X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=savevm.c;h=31dcce975ed40827db75df2d8ef7024f022e6f97;hb=f3f4d2c09b9cf46903ba38425ec46c44185162bd;hp=38699de4a3c78c4329538bc3b342b2ab00f559f2;hpb=817b9ed5eb300dbb434d752da416441028539a96;p=qemu.git diff --git a/savevm.c b/savevm.c index 38699de4a..31dcce975 100644 --- a/savevm.c +++ b/savevm.c @@ -39,6 +39,7 @@ #include "qmp-commands.h" #include "trace.h" #include "qemu/bitops.h" +#include "qemu/iov.h" #define SELF_ANNOUNCE_ROUNDS 5 @@ -113,18 +114,24 @@ void qemu_announce_self(void) /* savevm/loadvm support */ #define IO_BUF_SIZE 32768 +#define MAX_IOV_SIZE MIN(IOV_MAX, 64) struct QEMUFile { const QEMUFileOps *ops; void *opaque; - int is_write; - int64_t buf_offset; /* start of buffer when writing, end of buffer - when reading */ + int64_t bytes_xfer; + int64_t xfer_limit; + + int64_t pos; /* start of buffer when writing, end of buffer + when reading */ int buf_index; int buf_size; /* 0 when writing */ uint8_t buf[IO_BUF_SIZE]; + struct iovec iov[MAX_IOV_SIZE]; + unsigned int iovcnt; + int last_error; }; @@ -168,6 +175,20 @@ static void coroutine_fn yield_until_fd_readable(int fd) qemu_coroutine_yield(); } +static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, + int64_t pos) +{ + QEMUFileSocket *s = opaque; + ssize_t len; + ssize_t size = iov_size(iov, iovcnt); + + len = iov_send(s->fd, iov, iovcnt, 0, size); + if (len < size) { + len = -socket_error(); + } + return len; +} + static int socket_get_fd(void *opaque) { QEMUFileSocket *s = opaque; @@ -247,6 +268,9 @@ static int stdio_pclose(void *opaque) ret = pclose(s->stdio_file); if (ret == -1) { ret = -errno; + } else if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) { + /* close succeeded, but non-zero exit code: */ + ret = -EIO; /* fake errno value */ } g_free(s); return ret; @@ -256,6 +280,24 @@ static int stdio_fclose(void *opaque) { QEMUFileStdio *s = opaque; int ret = 0; + + if (s->file->ops->put_buffer || s->file->ops->writev_buffer) { + int fd = fileno(s->stdio_file); + struct stat st; + + ret = fstat(fd, &st); + if (ret == 0 && S_ISREG(st.st_mode)) { + /* + * If the file handle is a regular file make sure the + * data is flushed to disk before signaling success. + */ + ret = fsync(fd); + if (ret != 0) { + ret = -errno; + return ret; + } + } + } if (fclose(s->stdio_file) == EOF) { ret = -errno; } @@ -314,9 +356,94 @@ static const QEMUFileOps stdio_file_write_ops = { .close = stdio_fclose }; +static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, + int64_t pos) +{ + QEMUFileSocket *s = opaque; + ssize_t len, offset; + ssize_t size = iov_size(iov, iovcnt); + ssize_t total = 0; + + assert(iovcnt > 0); + offset = 0; + while (size > 0) { + /* Find the next start position; skip all full-sized vector elements */ + while (offset >= iov[0].iov_len) { + offset -= iov[0].iov_len; + iov++, iovcnt--; + } + + /* skip `offset' bytes from the (now) first element, undo it on exit */ + assert(iovcnt > 0); + iov[0].iov_base += offset; + iov[0].iov_len -= offset; + + do { + len = writev(s->fd, iov, iovcnt); + } while (len == -1 && errno == EINTR); + if (len == -1) { + return -errno; + } + + /* Undo the changes above */ + iov[0].iov_base -= offset; + iov[0].iov_len += offset; + + /* Prepare for the next iteration */ + offset += len; + total += len; + size -= len; + } + + return total; +} + +static int unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) +{ + QEMUFileSocket *s = opaque; + ssize_t len; + + for (;;) { + len = read(s->fd, buf, size); + if (len != -1) { + break; + } + if (errno == EAGAIN) { + yield_until_fd_readable(s->fd); + } else if (errno != EINTR) { + break; + } + } + + if (len == -1) { + len = -errno; + } + return len; +} + +static int unix_close(void *opaque) +{ + QEMUFileSocket *s = opaque; + close(s->fd); + g_free(s); + return 0; +} + +static const QEMUFileOps unix_read_ops = { + .get_fd = socket_get_fd, + .get_buffer = unix_get_buffer, + .close = unix_close +}; + +static const QEMUFileOps unix_write_ops = { + .get_fd = socket_get_fd, + .writev_buffer = unix_writev_buffer, + .close = unix_close +}; + QEMUFile *qemu_fdopen(int fd, const char *mode) { - QEMUFileStdio *s; + QEMUFileSocket *s; if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || @@ -325,21 +452,15 @@ QEMUFile *qemu_fdopen(int fd, const char *mode) return NULL; } - s = g_malloc0(sizeof(QEMUFileStdio)); - s->stdio_file = fdopen(fd, mode); - if (!s->stdio_file) - goto fail; + s = g_malloc0(sizeof(QEMUFileSocket)); + s->fd = fd; if(mode[0] == 'r') { - s->file = qemu_fopen_ops(s, &stdio_file_read_ops); + s->file = qemu_fopen_ops(s, &unix_read_ops); } else { - s->file = qemu_fopen_ops(s, &stdio_file_write_ops); + s->file = qemu_fopen_ops(s, &unix_write_ops); } return s->file; - -fail: - g_free(s); - return NULL; } static const QEMUFileOps socket_read_ops = { @@ -348,12 +469,30 @@ static const QEMUFileOps socket_read_ops = { .close = socket_close }; -QEMUFile *qemu_fopen_socket(int fd) +static const QEMUFileOps socket_write_ops = { + .get_fd = socket_get_fd, + .writev_buffer = socket_writev_buffer, + .close = socket_close +}; + +QEMUFile *qemu_fopen_socket(int fd, const char *mode) { QEMUFileSocket *s = g_malloc0(sizeof(QEMUFileSocket)); + if (mode == NULL || + (mode[0] != 'r' && mode[0] != 'w') || + mode[1] != 'b' || mode[2] != 0) { + fprintf(stderr, "qemu_fopen: Argument validity check failed\n"); + return NULL; + } + s->fd = fd; - s->file = qemu_fopen_ops(s, &socket_read_ops); + if (mode[0] == 'w') { + qemu_set_block(s->fd); + s->file = qemu_fopen_ops(s, &socket_write_ops); + } else { + s->file = qemu_fopen_ops(s, &socket_read_ops); + } return s->file; } @@ -385,6 +524,21 @@ fail: return NULL; } +static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, + int64_t pos) +{ + int ret; + QEMUIOVector qiov; + + qemu_iovec_init_external(&qiov, iov, iovcnt); + ret = bdrv_writev_vmstate(opaque, &qiov, pos); + if (ret < 0) { + return ret; + } + + return qiov.size; +} + static int block_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { @@ -408,8 +562,9 @@ static const QEMUFileOps bdrv_read_ops = { }; static const QEMUFileOps bdrv_write_ops = { - .put_buffer = block_put_buffer, - .close = bdrv_fclose + .put_buffer = block_put_buffer, + .writev_buffer = block_writev_buffer, + .close = bdrv_fclose }; static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) @@ -427,8 +582,6 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) f->opaque = opaque; f->ops = ops; - f->is_write = 0; - return f; } @@ -444,23 +597,39 @@ static void qemu_file_set_error(QEMUFile *f, int ret) } } -/** Flushes QEMUFile buffer +static inline bool qemu_file_is_writable(QEMUFile *f) +{ + return f->ops->writev_buffer || f->ops->put_buffer; +} + +/** + * Flushes QEMUFile buffer * + * If there is writev_buffer QEMUFileOps it uses it otherwise uses + * put_buffer ops. */ static void qemu_fflush(QEMUFile *f) { - int ret = 0; + ssize_t ret = 0; - if (!f->ops->put_buffer) { + if (!qemu_file_is_writable(f)) { return; } - if (f->is_write && f->buf_index > 0) { - ret = f->ops->put_buffer(f->opaque, f->buf, f->buf_offset, f->buf_index); - if (ret >= 0) { - f->buf_offset += f->buf_index; + + if (f->ops->writev_buffer) { + if (f->iovcnt > 0) { + ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); } - f->buf_index = 0; + } else { + if (f->buf_index > 0) { + ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index); + } + } + if (ret >= 0) { + f->pos += ret; } + f->buf_index = 0; + f->iovcnt = 0; if (ret < 0) { qemu_file_set_error(f, ret); } @@ -471,11 +640,7 @@ static void qemu_fill_buffer(QEMUFile *f) int len; int pending; - if (!f->ops->get_buffer) - return; - - if (f->is_write) - abort(); + assert(!qemu_file_is_writable(f)); pending = f->buf_size - f->buf_index; if (pending > 0) { @@ -484,11 +649,11 @@ static void qemu_fill_buffer(QEMUFile *f) f->buf_index = 0; f->buf_size = pending; - len = f->ops->get_buffer(f->opaque, f->buf + pending, f->buf_offset, + len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, IO_BUF_SIZE - pending); if (len > 0) { f->buf_size += len; - f->buf_offset += len; + f->pos += len; } else if (len == 0) { qemu_file_set_error(f, -EIO); } else if (len != -EAGAIN) @@ -533,18 +698,43 @@ int qemu_fclose(QEMUFile *f) return ret; } -void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) +static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size) { - int l; + /* check for adjacent buffer and coalesce them */ + if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base + + f->iov[f->iovcnt - 1].iov_len) { + f->iov[f->iovcnt - 1].iov_len += size; + } else { + f->iov[f->iovcnt].iov_base = (uint8_t *)buf; + f->iov[f->iovcnt++].iov_len = size; + } + + if (f->iovcnt >= MAX_IOV_SIZE) { + qemu_fflush(f); + } +} + +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size) +{ + if (!f->ops->writev_buffer) { + qemu_put_buffer(f, buf, size); + return; + } if (f->last_error) { return; } - if (f->is_write == 0 && f->buf_index > 0) { - fprintf(stderr, - "Attempted to write to buffer while read buffer is not empty\n"); - abort(); + f->bytes_xfer += size; + add_to_iovec(f, buf, size); +} + +void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) +{ + int l; + + if (f->last_error) { + return; } while (size > 0) { @@ -552,16 +742,19 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) if (l > size) l = size; memcpy(f->buf + f->buf_index, buf, l); - f->is_write = 1; + f->bytes_xfer += size; + if (f->ops->writev_buffer) { + add_to_iovec(f, f->buf + f->buf_index, l); + } f->buf_index += l; - buf += l; - size -= l; - if (f->buf_index >= IO_BUF_SIZE) { + if (f->buf_index == IO_BUF_SIZE) { qemu_fflush(f); - if (qemu_file_get_error(f)) { - break; - } } + if (qemu_file_get_error(f)) { + break; + } + buf += l; + size -= l; } } @@ -571,15 +764,13 @@ void qemu_put_byte(QEMUFile *f, int v) return; } - if (f->is_write == 0 && f->buf_index > 0) { - fprintf(stderr, - "Attempted to write to buffer while read buffer is not empty\n"); - abort(); + f->buf[f->buf_index] = v; + f->bytes_xfer++; + if (f->ops->writev_buffer) { + add_to_iovec(f, f->buf + f->buf_index, 1); } - - f->buf[f->buf_index++] = v; - f->is_write = 1; - if (f->buf_index >= IO_BUF_SIZE) { + f->buf_index++; + if (f->buf_index == IO_BUF_SIZE) { qemu_fflush(f); } } @@ -596,9 +787,7 @@ static int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset) int pending; int index; - if (f->is_write) { - abort(); - } + assert(!qemu_file_is_writable(f)); index = f->buf_index + offset; pending = f->buf_size - index; @@ -643,9 +832,7 @@ static int qemu_peek_byte(QEMUFile *f, int offset) { int index = f->buf_index + offset; - if (f->is_write) { - abort(); - } + assert(!qemu_file_is_writable(f)); if (index >= f->buf_size) { qemu_fill_buffer(f); @@ -668,38 +855,34 @@ int qemu_get_byte(QEMUFile *f) int64_t qemu_ftell(QEMUFile *f) { - /* buf_offset excludes buffer for writing but includes it for reading */ - if (f->is_write) { - return f->buf_offset + f->buf_index; - } else { - return f->buf_offset - f->buf_size + f->buf_index; - } + qemu_fflush(f); + return f->pos; } int qemu_file_rate_limit(QEMUFile *f) { - if (f->ops->rate_limit) - return f->ops->rate_limit(f->opaque); - + if (qemu_file_get_error(f)) { + return 1; + } + if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) { + return 1; + } return 0; } int64_t qemu_file_get_rate_limit(QEMUFile *f) { - if (f->ops->get_rate_limit) - return f->ops->get_rate_limit(f->opaque); - - return 0; + return f->xfer_limit; } -int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate) +void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit) { - /* any failed or completed migration keeps its state to allow probing of - * migration data, but has no associated file anymore */ - if (f && f->ops->set_rate_limit) - return f->ops->set_rate_limit(f->opaque, new_rate); + f->xfer_limit = limit; +} - return 0; +void qemu_file_reset_rate_limit(QEMUFile *f) +{ + f->bytes_xfer = 0; } void qemu_put_be16(QEMUFile *f, unsigned int v) @@ -1022,6 +1205,27 @@ const VMStateInfo vmstate_info_uint64 = { .put = put_uint64, }; +/* 64 bit unsigned int. See that the received value is the same than the one + in the field */ + +static int get_uint64_equal(QEMUFile *f, void *pv, size_t size) +{ + uint64_t *v = pv; + uint64_t v2; + qemu_get_be64s(f, &v2); + + if (*v == v2) { + return 0; + } + return -EINVAL; +} + +const VMStateInfo vmstate_info_uint64_equal = { + .name = "int64 equal", + .get = get_uint64_equal, + .put = put_uint64, +}; + /* 8 bit int. See that the received value is the same than the one in the field */ @@ -1062,6 +1266,29 @@ const VMStateInfo vmstate_info_uint16_equal = { .put = put_uint16, }; +/* floating point */ + +static int get_float64(QEMUFile *f, void *pv, size_t size) +{ + float64 *v = pv; + + *v = make_float64(qemu_get_be64(f)); + return 0; +} + +static void put_float64(QEMUFile *f, void *pv, size_t size) +{ + uint64_t *v = pv; + + qemu_put_be64(f, float64_val(*v)); +} + +const VMStateInfo vmstate_info_float64 = { + .name = "float64", + .get = get_float64, + .put = put_float64, +}; + /* timers */ static int get_timer(QEMUFile *f, void *pv, size_t size) @@ -1373,13 +1600,6 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, return 0; } -int vmstate_register(DeviceState *dev, int instance_id, - const VMStateDescription *vmsd, void *opaque) -{ - return vmstate_register_with_alias_id(dev, instance_id, vmsd, - opaque, -1, 0); -} - void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd, void *opaque) {