]> git.proxmox.com Git - mirror_qemu.git/commitdiff
Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20160126-2' into staging
authorPeter Maydell <peter.maydell@linaro.org>
Tue, 26 Jan 2016 17:25:11 +0000 (17:25 +0000)
committerPeter Maydell <peter.maydell@linaro.org>
Tue, 26 Jan 2016 17:25:11 +0000 (17:25 +0000)
Xen 2016/01/26 with Signed-off-by lines.

# gpg: Signature made Tue 26 Jan 2016 17:20:12 GMT using RSA key ID 70E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"

* remotes/sstabellini/tags/xen-20160126-2:
  xen: make it possible to build without the Xen PV domain builder
  xen: domainbuild: reopen libxenctrl interface after forking for domain watcher.
  xen: Use stable library interfaces when they are available.
  xen: Switch uses of xc_map_foreign_{pages,bulk} to use libxenforeignmemory API.
  xen: Switch uses of xc_map_foreign_range into xc_map_foreign_pages
  xen: Switch to libxengnttab interface for compat shims.
  xen: Switch to libxenevtchn interface for compat shims.
  xen_console: correctly cleanup primary console on teardown.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13 files changed:
cpus.c
fsdev/virtfs-proxy-helper.texi
nbd/server.c
numa.c
qapi-schema.json
qemu-char.c
qemu-doc.texi
qemu-ga.texi
qemu-img.texi
qemu-options.hx
scripts/dump-guest-memory.py
scripts/kvm/kvm_stat
tests/Makefile

diff --git a/cpus.c b/cpus.c
index 3efff6b109057221c63745c802eb3f712839702f..1e97cc4821a4a2ab6ff501c1fb9726b139510b0c 100644 (file)
--- a/cpus.c
+++ b/cpus.c
@@ -986,7 +986,7 @@ static void qemu_wait_io_event_common(CPUState *cpu)
     if (cpu->stop) {
         cpu->stop = false;
         cpu->stopped = true;
-        qemu_cond_signal(&qemu_pause_cond);
+        qemu_cond_broadcast(&qemu_pause_cond);
     }
     flush_queued_work(cpu);
     cpu->thread_kicked = false;
@@ -1396,7 +1396,7 @@ void cpu_stop_current(void)
         current_cpu->stop = false;
         current_cpu->stopped = true;
         cpu_exit(current_cpu);
-        qemu_cond_signal(&qemu_pause_cond);
+        qemu_cond_broadcast(&qemu_pause_cond);
     }
 }
 
index e60e3b946558ab839cd167eb55fa78917d8d0894..9a25d7ecf4f7c4ea413528db2d23d1b18c20fa33 100644 (file)
@@ -1,6 +1,6 @@
 @example
 @c man begin SYNOPSIS
-usage: virtfs-proxy-helper options
+@command{virtfs-proxy-helper} @var{options}
 @c man end
 @end example
 
index 2265cb0680f0ce6bde148786d2721baf9b444959..256feafcece51b3e89f633da715473af4b7f0968 100644 (file)
@@ -671,7 +671,9 @@ NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
      * that BDRV_O_INACTIVE is cleared and the image is ready for write
      * access since the export could be available before migration handover.
      */
+    aio_context_acquire(exp->ctx);
     blk_invalidate_cache(blk, NULL);
+    aio_context_release(exp->ctx);
     return exp;
 
 fail:
diff --git a/numa.c b/numa.c
index 425ef8dc214775d07c7decf9e177fcf7e5b54789..23a5d830243ce1415c11c9158f1722134f13428b 100644 (file)
--- a/numa.c
+++ b/numa.c
@@ -418,12 +418,15 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
         Error *err = NULL;
         memory_region_init_ram_from_file(mr, owner, name, ram_size, false,
                                          mem_path, &err);
-
-        /* Legacy behavior: if allocation failed, fall back to
-         * regular RAM allocation.
-         */
         if (err) {
             error_report_err(err);
+            if (mem_prealloc) {
+                exit(1);
+            }
+
+            /* Legacy behavior: if allocation failed, fall back to
+             * regular RAM allocation.
+             */
             memory_region_init_ram(mr, owner, name, ram_size, &error_fatal);
         }
 #else
index b3038b215ae7074479f07f27a354c98827f03b0b..8d04897922ec6dd8f34dd9425fdcb75847b843da 100644 (file)
 #
 # @addr: socket address to listen on (server=true)
 #        or connect to (server=false)
+# @tls-creds: #optional the ID of the TLS credentials object (since 2.6)
 # @server: #optional create server socket (default: true)
 # @wait: #optional wait for incoming connection on server
 #        sockets (default: false).
 # Since: 1.4
 ##
 { 'struct': 'ChardevSocket', 'data': { 'addr'       : 'SocketAddress',
+                                     '*tls-creds'  : 'str',
                                      '*server'    : 'bool',
                                      '*wait'      : 'bool',
                                      '*nodelay'   : 'bool',
index e133f4fc35d7ebe5ecb8848e1f2f6046646c4088..ca53e8c376ef56856dcd5a702363c910219aceea 100644 (file)
@@ -33,6 +33,9 @@
 #include "qapi/qmp-output-visitor.h"
 #include "qapi-visit.h"
 #include "qemu/base64.h"
+#include "io/channel-socket.h"
+#include "io/channel-file.h"
+#include "io/channel-tls.h"
 
 #include <unistd.h>
 #include <fcntl.h>
 
 #define READ_BUF_LEN 4096
 #define READ_RETRIES 10
-#define CHR_MAX_FILENAME_SIZE 256
 #define TCP_MAX_FDS 16
 
 /***********************************************************/
 /* Socket address helpers */
 
-static int SocketAddress_to_str(char *dest, int max_len,
-                                const char *prefix, SocketAddress *addr,
-                                bool is_listen, bool is_telnet)
+static char *SocketAddress_to_str(const char *prefix, SocketAddress *addr,
+                                  bool is_listen, bool is_telnet)
 {
     switch (addr->type) {
     case SOCKET_ADDRESS_KIND_INET:
-        return snprintf(dest, max_len, "%s%s:%s:%s%s", prefix,
-                        is_telnet ? "telnet" : "tcp", addr->u.inet->host,
-                        addr->u.inet->port, is_listen ? ",server" : "");
+        return g_strdup_printf("%s%s:%s:%s%s", prefix,
+                               is_telnet ? "telnet" : "tcp", addr->u.inet->host,
+                               addr->u.inet->port, is_listen ? ",server" : "");
         break;
     case SOCKET_ADDRESS_KIND_UNIX:
-        return snprintf(dest, max_len, "%sunix:%s%s", prefix,
-                        addr->u.q_unix->path, is_listen ? ",server" : "");
+        return g_strdup_printf("%sunix:%s%s", prefix,
+                               addr->u.q_unix->path,
+                               is_listen ? ",server" : "");
         break;
     case SOCKET_ADDRESS_KIND_FD:
-        return snprintf(dest, max_len, "%sfd:%s%s", prefix, addr->u.fd->str,
-                        is_listen ? ",server" : "");
+        return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd->str,
+                               is_listen ? ",server" : "");
         break;
     default:
         abort();
     }
 }
 
-static int sockaddr_to_str(char *dest, int max_len,
-                           struct sockaddr_storage *ss, socklen_t ss_len,
-                           struct sockaddr_storage *ps, socklen_t ps_len,
-                           bool is_listen, bool is_telnet)
+static char *sockaddr_to_str(struct sockaddr_storage *ss, socklen_t ss_len,
+                             struct sockaddr_storage *ps, socklen_t ps_len,
+                             bool is_listen, bool is_telnet)
 {
     char shost[NI_MAXHOST], sserv[NI_MAXSERV];
     char phost[NI_MAXHOST], pserv[NI_MAXSERV];
@@ -129,9 +130,9 @@ static int sockaddr_to_str(char *dest, int max_len,
     switch (ss->ss_family) {
 #ifndef _WIN32
     case AF_UNIX:
-        return snprintf(dest, max_len, "unix:%s%s",
-                        ((struct sockaddr_un *)(ss))->sun_path,
-                        is_listen ? ",server" : "");
+        return g_strdup_printf("unix:%s%s",
+                               ((struct sockaddr_un *)(ss))->sun_path,
+                               is_listen ? ",server" : "");
 #endif
     case AF_INET6:
         left  = "[";
@@ -142,14 +143,14 @@ static int sockaddr_to_str(char *dest, int max_len,
                     sserv, sizeof(sserv), NI_NUMERICHOST | NI_NUMERICSERV);
         getnameinfo((struct sockaddr *) ps, ps_len, phost, sizeof(phost),
                     pserv, sizeof(pserv), NI_NUMERICHOST | NI_NUMERICSERV);
-        return snprintf(dest, max_len, "%s:%s%s%s:%s%s <-> %s%s%s:%s",
-                        is_telnet ? "telnet" : "tcp",
-                        left, shost, right, sserv,
-                        is_listen ? ",server" : "",
-                        left, phost, right, pserv);
+        return g_strdup_printf("%s:%s%s%s:%s%s <-> %s%s%s:%s",
+                               is_telnet ? "telnet" : "tcp",
+                               left, shost, right, sserv,
+                               is_listen ? ",server" : "",
+                               left, phost, right, pserv);
 
     default:
-        return snprintf(dest, max_len, "unknown");
+        return g_strdup_printf("unknown");
     }
 }
 
@@ -768,7 +769,7 @@ typedef struct IOWatchPoll
 {
     GSource parent;
 
-    GIOChannel *channel;
+    QIOChannel *ioc;
     GSource *src;
 
     IOCanReadHandler *fd_can_read;
@@ -791,8 +792,8 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
     }
 
     if (now_active) {
-        iwp->src = g_io_create_watch(iwp->channel,
-                                     G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
+        iwp->src = qio_channel_create_watch(
+            iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
         g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
         g_source_attach(iwp->src, NULL);
     } else {
@@ -838,9 +839,9 @@ static GSourceFuncs io_watch_poll_funcs = {
 };
 
 /* Can only be used for read */
-static guint io_add_watch_poll(GIOChannel *channel,
+static guint io_add_watch_poll(QIOChannel *ioc,
                                IOCanReadHandler *fd_can_read,
-                               GIOFunc fd_read,
+                               QIOChannelFunc fd_read,
                                gpointer user_data)
 {
     IOWatchPoll *iwp;
@@ -849,7 +850,7 @@ static guint io_add_watch_poll(GIOChannel *channel,
     iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll));
     iwp->fd_can_read = fd_can_read;
     iwp->opaque = user_data;
-    iwp->channel = channel;
+    iwp->ioc = ioc;
     iwp->fd_read = (GSourceFunc) fd_read;
     iwp->src = NULL;
 
@@ -885,79 +886,50 @@ static void remove_fd_in_watch(CharDriverState *chr)
     }
 }
 
-#ifndef _WIN32
-static GIOChannel *io_channel_from_fd(int fd)
-{
-    GIOChannel *chan;
-
-    if (fd == -1) {
-        return NULL;
-    }
-
-    chan = g_io_channel_unix_new(fd);
-
-    g_io_channel_set_encoding(chan, NULL, NULL);
-    g_io_channel_set_buffered(chan, FALSE);
-
-    return chan;
-}
-#endif
 
-static GIOChannel *io_channel_from_socket(int fd)
+static int io_channel_send_full(QIOChannel *ioc,
+                                const void *buf, size_t len,
+                                int *fds, size_t nfds)
 {
-    GIOChannel *chan;
+    size_t offset = 0;
 
-    if (fd == -1) {
-        return NULL;
-    }
+    while (offset < len) {
+        ssize_t ret = 0;
+        struct iovec iov = { .iov_base = (char *)buf + offset,
+                             .iov_len = len - offset };
+
+        ret = qio_channel_writev_full(
+            ioc, &iov, 1,
+            fds, nfds, NULL);
+        if (ret == QIO_CHANNEL_ERR_BLOCK) {
+            errno = EAGAIN;
+            return -1;
+        } else if (ret < 0) {
+            if (offset) {
+                return offset;
+            }
 
-#ifdef _WIN32
-    chan = g_io_channel_win32_new_socket(fd);
-#else
-    chan = g_io_channel_unix_new(fd);
-#endif
+            errno = EINVAL;
+            return -1;
+        }
 
-    g_io_channel_set_encoding(chan, NULL, NULL);
-    g_io_channel_set_buffered(chan, FALSE);
+        offset += ret;
+    }
 
-    return chan;
+    return offset;
 }
 
-static int io_channel_send(GIOChannel *fd, const void *buf, size_t len)
-{
-    size_t offset = 0;
-    GIOStatus status = G_IO_STATUS_NORMAL;
 
-    while (offset < len && status == G_IO_STATUS_NORMAL) {
-        gsize bytes_written = 0;
-
-        status = g_io_channel_write_chars(fd, buf + offset, len - offset,
-                                          &bytes_written, NULL);
-        offset += bytes_written;
-    }
-
-    if (offset > 0) {
-        return offset;
-    }
-    switch (status) {
-    case G_IO_STATUS_NORMAL:
-        g_assert(len == 0);
-        return 0;
-    case G_IO_STATUS_AGAIN:
-        errno = EAGAIN;
-        return -1;
-    default:
-        break;
-    }
-    errno = EINVAL;
-    return -1;
+#ifndef _WIN32
+static int io_channel_send(QIOChannel *ioc, const void *buf, size_t len)
+{
+    return io_channel_send_full(ioc, buf, len, NULL, 0);
 }
 
-#ifndef _WIN32
 
 typedef struct FDCharDriver {
     CharDriverState *chr;
-    GIOChannel *fd_in, *fd_out;
+    QIOChannel *ioc_in, *ioc_out;
     int max_size;
 } FDCharDriver;
 
@@ -966,17 +938,16 @@ static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
     FDCharDriver *s = chr->opaque;
     
-    return io_channel_send(s->fd_out, buf, len);
+    return io_channel_send(s->ioc_out, buf, len);
 }
 
-static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
 {
     CharDriverState *chr = opaque;
     FDCharDriver *s = chr->opaque;
     int len;
     uint8_t buf[READ_BUF_LEN];
-    GIOStatus status;
-    gsize bytes_read;
+    ssize_t ret;
 
     len = sizeof(buf);
     if (len > s->max_size) {
@@ -986,15 +957,15 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
         return TRUE;
     }
 
-    status = g_io_channel_read_chars(chan, (gchar *)buf,
-                                     len, &bytes_read, NULL);
-    if (status == G_IO_STATUS_EOF) {
+    ret = qio_channel_read(
+        chan, (gchar *)buf, len, NULL);
+    if (ret == 0) {
         remove_fd_in_watch(chr);
         qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
         return FALSE;
     }
-    if (status == G_IO_STATUS_NORMAL) {
-        qemu_chr_be_write(chr, buf, bytes_read);
+    if (ret > 0) {
+        qemu_chr_be_write(chr, buf, ret);
     }
 
     return TRUE;
@@ -1012,7 +983,7 @@ static int fd_chr_read_poll(void *opaque)
 static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond)
 {
     FDCharDriver *s = chr->opaque;
-    return g_io_create_watch(s->fd_out, cond);
+    return qio_channel_create_watch(s->ioc_out, cond);
 }
 
 static void fd_chr_update_read_handler(CharDriverState *chr)
@@ -1020,8 +991,9 @@ static void fd_chr_update_read_handler(CharDriverState *chr)
     FDCharDriver *s = chr->opaque;
 
     remove_fd_in_watch(chr);
-    if (s->fd_in) {
-        chr->fd_in_tag = io_add_watch_poll(s->fd_in, fd_chr_read_poll,
+    if (s->ioc_in) {
+        chr->fd_in_tag = io_add_watch_poll(s->ioc_in,
+                                           fd_chr_read_poll,
                                            fd_chr_read, chr);
     }
 }
@@ -1031,11 +1003,11 @@ static void fd_chr_close(struct CharDriverState *chr)
     FDCharDriver *s = chr->opaque;
 
     remove_fd_in_watch(chr);
-    if (s->fd_in) {
-        g_io_channel_unref(s->fd_in);
+    if (s->ioc_in) {
+        object_unref(OBJECT(s->ioc_in));
     }
-    if (s->fd_out) {
-        g_io_channel_unref(s->fd_out);
+    if (s->ioc_out) {
+        object_unref(OBJECT(s->ioc_out));
     }
 
     g_free(s);
@@ -1054,8 +1026,8 @@ static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out,
         return NULL;
     }
     s = g_new0(FDCharDriver, 1);
-    s->fd_in = io_channel_from_fd(fd_in);
-    s->fd_out = io_channel_from_fd(fd_out);
+    s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
+    s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out));
     qemu_set_nonblock(fd_out);
     s->chr = chr;
     chr->opaque = s;
@@ -1074,15 +1046,18 @@ static CharDriverState *qemu_chr_open_pipe(const char *id,
 {
     ChardevHostdev *opts = backend->u.pipe;
     int fd_in, fd_out;
-    char filename_in[CHR_MAX_FILENAME_SIZE];
-    char filename_out[CHR_MAX_FILENAME_SIZE];
+    char *filename_in;
+    char *filename_out;
     const char *filename = opts->device;
     ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.pipe);
 
-    snprintf(filename_in, CHR_MAX_FILENAME_SIZE, "%s.in", filename);
-    snprintf(filename_out, CHR_MAX_FILENAME_SIZE, "%s.out", filename);
+
+    filename_in = g_strdup_printf("%s.in", filename);
+    filename_out = g_strdup_printf("%s.out", filename);
     TFR(fd_in = qemu_open(filename_in, O_RDWR | O_BINARY));
     TFR(fd_out = qemu_open(filename_out, O_RDWR | O_BINARY));
+    g_free(filename_in);
+    g_free(filename_out);
     if (fd_in < 0 || fd_out < 0) {
        if (fd_in >= 0)
            close(fd_in);
@@ -1195,7 +1170,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id,
 #define HAVE_CHARDEV_PTY 1
 
 typedef struct {
-    GIOChannel *fd;
+    QIOChannel *ioc;
     int read_bytes;
 
     /* Protected by the CharDriverState chr_write_lock.  */
@@ -1246,8 +1221,9 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr)
     PtyCharDriver *s = chr->opaque;
     GPollFD pfd;
     int rc;
+    QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc);
 
-    pfd.fd = g_io_channel_unix_get_fd(s->fd);
+    pfd.fd = fioc->fd;
     pfd.events = G_IO_OUT;
     pfd.revents = 0;
     do {
@@ -1281,7 +1257,7 @@ static int pty_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
             return 0;
         }
     }
-    return io_channel_send(s->fd, buf, len);
+    return io_channel_send(s->ioc, buf, len);
 }
 
 static GSource *pty_chr_add_watch(CharDriverState *chr, GIOCondition cond)
@@ -1290,7 +1266,7 @@ static GSource *pty_chr_add_watch(CharDriverState *chr, GIOCondition cond)
     if (!s->connected) {
         return NULL;
     }
-    return g_io_create_watch(s->fd, cond);
+    return qio_channel_create_watch(s->ioc, cond);
 }
 
 static int pty_chr_read_poll(void *opaque)
@@ -1302,13 +1278,13 @@ static int pty_chr_read_poll(void *opaque)
     return s->read_bytes;
 }
 
-static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
 {
     CharDriverState *chr = opaque;
     PtyCharDriver *s = chr->opaque;
-    gsize size, len;
+    gsize len;
     uint8_t buf[READ_BUF_LEN];
-    GIOStatus status;
+    ssize_t ret;
 
     len = sizeof(buf);
     if (len > s->read_bytes)
@@ -1316,13 +1292,13 @@ static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
     if (len == 0) {
         return TRUE;
     }
-    status = g_io_channel_read_chars(s->fd, (gchar *)buf, len, &size, NULL);
-    if (status != G_IO_STATUS_NORMAL) {
+    ret = qio_channel_read(s->ioc, (char *)buf, len, NULL);
+    if (ret <= 0) {
         pty_chr_state(chr, 0);
         return FALSE;
     } else {
         pty_chr_state(chr, 1);
-        qemu_chr_be_write(chr, buf, size);
+        qemu_chr_be_write(chr, buf, ret);
     }
     return TRUE;
 }
@@ -1364,7 +1340,8 @@ static void pty_chr_state(CharDriverState *chr, int connected)
             s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr);
         }
         if (!chr->fd_in_tag) {
-            chr->fd_in_tag = io_add_watch_poll(s->fd, pty_chr_read_poll,
+            chr->fd_in_tag = io_add_watch_poll(s->ioc,
+                                               pty_chr_read_poll,
                                                pty_chr_read, chr);
         }
     }
@@ -1373,13 +1350,10 @@ static void pty_chr_state(CharDriverState *chr, int connected)
 static void pty_chr_close(struct CharDriverState *chr)
 {
     PtyCharDriver *s = chr->opaque;
-    int fd;
 
     qemu_mutex_lock(&chr->chr_write_lock);
     pty_chr_state(chr, 0);
-    fd = g_io_channel_unix_get_fd(s->fd);
-    g_io_channel_unref(s->fd);
-    close(fd);
+    object_unref(OBJECT(s->ioc));
     if (s->timer_tag) {
         g_source_remove(s->timer_tag);
         s->timer_tag = 0;
@@ -1430,7 +1404,7 @@ static CharDriverState *qemu_chr_open_pty(const char *id,
     chr->chr_add_watch = pty_chr_add_watch;
     chr->explicit_be_open = true;
 
-    s->fd = io_channel_from_fd(master_fd);
+    s->ioc = QIO_CHANNEL(qio_channel_file_new_fd(master_fd));
     s->timer_tag = 0;
 
     return chr;
@@ -1554,12 +1528,13 @@ static void tty_serial_init(int fd, int speed,
 static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
 {
     FDCharDriver *s = chr->opaque;
+    QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc_in);
 
     switch(cmd) {
     case CHR_IOCTL_SERIAL_SET_PARAMS:
         {
             QEMUSerialSetParams *ssp = arg;
-            tty_serial_init(g_io_channel_unix_get_fd(s->fd_in),
+            tty_serial_init(fioc->fd,
                             ssp->speed, ssp->parity,
                             ssp->data_bits, ssp->stop_bits);
         }
@@ -1568,7 +1543,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
         {
             int enable = *(int *)arg;
             if (enable) {
-                tcsendbreak(g_io_channel_unix_get_fd(s->fd_in), 1);
+                tcsendbreak(fioc->fd, 1);
             }
         }
         break;
@@ -1576,7 +1551,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
         {
             int sarg = 0;
             int *targ = (int *)arg;
-            ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMGET, &sarg);
+            ioctl(fioc->fd, TIOCMGET, &sarg);
             *targ = 0;
             if (sarg & TIOCM_CTS)
                 *targ |= CHR_TIOCM_CTS;
@@ -1596,7 +1571,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
         {
             int sarg = *(int *)arg;
             int targ = 0;
-            ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMGET, &targ);
+            ioctl(fioc->fd, TIOCMGET, &targ);
             targ &= ~(CHR_TIOCM_CTS | CHR_TIOCM_CAR | CHR_TIOCM_DSR
                      | CHR_TIOCM_RI | CHR_TIOCM_DTR | CHR_TIOCM_RTS);
             if (sarg & CHR_TIOCM_CTS)
@@ -1611,7 +1586,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
                 targ |= TIOCM_DTR;
             if (sarg & CHR_TIOCM_RTS)
                 targ |= TIOCM_RTS;
-            ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMSET, &targ);
+            ioctl(fioc->fd, TIOCMSET, &targ);
         }
         break;
     default:
@@ -1622,18 +1597,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
 
 static void qemu_chr_close_tty(CharDriverState *chr)
 {
-    FDCharDriver *s = chr->opaque;
-    int fd = -1;
-
-    if (s) {
-        fd = g_io_channel_unix_get_fd(s->fd_in);
-    }
-
     fd_chr_close(chr);
-
-    if (fd >= 0) {
-        close(fd);
-    }
 }
 
 static CharDriverState *qemu_chr_open_tty_fd(int fd,
@@ -1776,18 +1740,19 @@ static CharDriverState *qemu_chr_open_pp_fd(int fd,
         return NULL;
     }
 
-    drv = g_new0(ParallelCharDriver, 1);
-    drv->fd = fd;
-    drv->mode = IEEE1284_MODE_COMPAT;
-
     chr = qemu_chr_alloc(backend, errp);
     if (!chr) {
         return NULL;
     }
+
+    drv = g_new0(ParallelCharDriver, 1);
+    chr->opaque = drv;
     chr->chr_write = null_chr_write;
     chr->chr_ioctl = pp_ioctl;
     chr->chr_close = pp_close;
-    chr->opaque = drv;
+
+    drv->fd = fd;
+    drv->mode = IEEE1284_MODE_COMPAT;
 
     return chr;
 }
@@ -2115,7 +2080,7 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename,
     OVERLAPPED ov;
     int ret;
     DWORD size;
-    char openname[CHR_MAX_FILENAME_SIZE];
+    char *openname;
 
     s->fpipe = TRUE;
 
@@ -2130,11 +2095,12 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename,
         goto fail;
     }
 
-    snprintf(openname, sizeof(openname), "\\\\.\\pipe\\%s", filename);
+    openname = g_strdup_printf("\\\\.\\pipe\\%s", filename);
     s->hcom = CreateNamedPipe(openname, PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED,
                               PIPE_TYPE_BYTE | PIPE_READMODE_BYTE |
                               PIPE_WAIT,
                               MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL);
+    g_free(openname);
     if (s->hcom == INVALID_HANDLE_VALUE) {
         error_setg(errp, "Failed CreateNamedPipe (%lu)", GetLastError());
         s->hcom = NULL;
@@ -2454,8 +2420,7 @@ err1:
 /* UDP Net console */
 
 typedef struct {
-    int fd;
-    GIOChannel *chan;
+    QIOChannel *ioc;
     uint8_t buf[READ_BUF_LEN];
     int bufcnt;
     int bufptr;
@@ -2466,17 +2431,9 @@ typedef struct {
 static int udp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
     NetCharDriver *s = chr->opaque;
-    gsize bytes_written;
-    GIOStatus status;
-
-    status = g_io_channel_write_chars(s->chan, (const gchar *)buf, len, &bytes_written, NULL);
-    if (status == G_IO_STATUS_EOF) {
-        return 0;
-    } else if (status != G_IO_STATUS_NORMAL) {
-        return -1;
-    }
 
-    return bytes_written;
+    return qio_channel_write(
+        s->ioc, (const char *)buf, len, NULL);
 }
 
 static int udp_chr_read_poll(void *opaque)
@@ -2497,24 +2454,22 @@ static int udp_chr_read_poll(void *opaque)
     return s->max_size;
 }
 
-static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
 {
     CharDriverState *chr = opaque;
     NetCharDriver *s = chr->opaque;
-    gsize bytes_read = 0;
-    GIOStatus status;
+    ssize_t ret;
 
     if (s->max_size == 0) {
         return TRUE;
     }
-    status = g_io_channel_read_chars(s->chan, (gchar *)s->buf, sizeof(s->buf),
-                                     &bytes_read, NULL);
-    s->bufcnt = bytes_read;
-    s->bufptr = s->bufcnt;
-    if (status != G_IO_STATUS_NORMAL) {
+    ret = qio_channel_read(
+        s->ioc, (char *)s->buf, sizeof(s->buf), NULL);
+    if (ret <= 0) {
         remove_fd_in_watch(chr);
         return FALSE;
     }
+    s->bufcnt = ret;
 
     s->bufptr = 0;
     while (s->max_size > 0 && s->bufptr < s->bufcnt) {
@@ -2531,8 +2486,9 @@ static void udp_chr_update_read_handler(CharDriverState *chr)
     NetCharDriver *s = chr->opaque;
 
     remove_fd_in_watch(chr);
-    if (s->chan) {
-        chr->fd_in_tag = io_add_watch_poll(s->chan, udp_chr_read_poll,
+    if (s->ioc) {
+        chr->fd_in_tag = io_add_watch_poll(s->ioc,
+                                           udp_chr_read_poll,
                                            udp_chr_read, chr);
     }
 }
@@ -2542,17 +2498,16 @@ static void udp_chr_close(CharDriverState *chr)
     NetCharDriver *s = chr->opaque;
 
     remove_fd_in_watch(chr);
-    if (s->chan) {
-        g_io_channel_unref(s->chan);
-        closesocket(s->fd);
+    if (s->ioc) {
+        object_unref(OBJECT(s->ioc));
     }
     g_free(s);
     qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
 }
 
-static CharDriverState *qemu_chr_open_udp_fd(int fd,
-                                             ChardevCommon *backend,
-                                             Error **errp)
+static CharDriverState *qemu_chr_open_udp(QIOChannelSocket *sioc,
+                                          ChardevCommon *backend,
+                                          Error **errp)
 {
     CharDriverState *chr = NULL;
     NetCharDriver *s = NULL;
@@ -2563,8 +2518,7 @@ static CharDriverState *qemu_chr_open_udp_fd(int fd,
     }
     s = g_new0(NetCharDriver, 1);
 
-    s->fd = fd;
-    s->chan = io_channel_from_socket(s->fd);
+    s->ioc = QIO_CHANNEL(sioc);
     s->bufcnt = 0;
     s->bufptr = 0;
     chr->opaque = s;
@@ -2580,19 +2534,20 @@ static CharDriverState *qemu_chr_open_udp_fd(int fd,
 /* TCP Net console */
 
 typedef struct {
-
-    GIOChannel *chan, *listen_chan;
+    QIOChannel *ioc; /* Client I/O channel */
+    QIOChannelSocket *sioc; /* Client master channel */
+    QIOChannelSocket *listen_ioc;
     guint listen_tag;
-    int fd, listen_fd;
+    QCryptoTLSCreds *tls_creds;
     int connected;
     int max_size;
     int do_telnetopt;
     int do_nodelay;
     int is_unix;
     int *read_msgfds;
-    int read_msgfds_num;
+    size_t read_msgfds_num;
     int *write_msgfds;
-    int write_msgfds_num;
+    size_t write_msgfds_num;
 
     SocketAddress *addr;
     bool is_listen;
@@ -2626,68 +2581,27 @@ static void check_report_connect_error(CharDriverState *chr,
     qemu_chr_socket_restart_timer(chr);
 }
 
-static gboolean tcp_chr_accept(GIOChannel *chan, GIOCondition cond, void *opaque);
-
-#ifndef _WIN32
-static int unix_send_msgfds(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    TCPCharDriver *s = chr->opaque;
-    struct msghdr msgh;
-    struct iovec iov;
-    int r;
-
-    size_t fd_size = s->write_msgfds_num * sizeof(int);
-    char control[CMSG_SPACE(fd_size)];
-    struct cmsghdr *cmsg;
-
-    memset(&msgh, 0, sizeof(msgh));
-    memset(control, 0, sizeof(control));
-
-    /* set the payload */
-    iov.iov_base = (uint8_t *) buf;
-    iov.iov_len = len;
-
-    msgh.msg_iov = &iov;
-    msgh.msg_iovlen = 1;
-
-    msgh.msg_control = control;
-    msgh.msg_controllen = sizeof(control);
-
-    cmsg = CMSG_FIRSTHDR(&msgh);
-
-    cmsg->cmsg_len = CMSG_LEN(fd_size);
-    cmsg->cmsg_level = SOL_SOCKET;
-    cmsg->cmsg_type = SCM_RIGHTS;
-    memcpy(CMSG_DATA(cmsg), s->write_msgfds, fd_size);
-
-    do {
-        r = sendmsg(s->fd, &msgh, 0);
-    } while (r < 0 && errno == EINTR);
-
-    /* free the written msgfds, no matter what */
-    if (s->write_msgfds_num) {
-        g_free(s->write_msgfds);
-        s->write_msgfds = 0;
-        s->write_msgfds_num = 0;
-    }
-
-    return r;
-}
-#endif
+static gboolean tcp_chr_accept(QIOChannel *chan,
+                               GIOCondition cond,
+                               void *opaque);
 
 /* Called with chr_write_lock held.  */
 static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
     TCPCharDriver *s = chr->opaque;
     if (s->connected) {
-#ifndef _WIN32
-        if (s->is_unix && s->write_msgfds_num) {
-            return unix_send_msgfds(chr, buf, len);
-        } else
-#endif
-        {
-            return io_channel_send(s->chan, buf, len);
+        int ret =  io_channel_send_full(s->ioc, buf, len,
+                                        s->write_msgfds,
+                                        s->write_msgfds_num);
+
+        /* free the written msgfds, no matter what */
+        if (s->write_msgfds_num) {
+            g_free(s->write_msgfds);
+            s->write_msgfds = 0;
+            s->write_msgfds_num = 0;
         }
+
+        return ret;
     } else {
         /* XXX: indicate an error ? */
         return len;
@@ -2783,6 +2697,10 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num)
 {
     TCPCharDriver *s = chr->opaque;
 
+    if (!qio_channel_has_feature(s->ioc,
+                                 QIO_CHANNEL_FEATURE_FD_PASS)) {
+        return -1;
+    }
     /* clear old pending fd array */
     g_free(s->write_msgfds);
 
@@ -2796,27 +2714,26 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num)
     return 0;
 }
 
-#ifndef _WIN32
-static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg)
+static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len)
 {
     TCPCharDriver *s = chr->opaque;
-    struct cmsghdr *cmsg;
-
-    for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
-        int fd_size, i;
-
-        if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) ||
-            cmsg->cmsg_level != SOL_SOCKET ||
-            cmsg->cmsg_type != SCM_RIGHTS) {
-            continue;
-        }
-
-        fd_size = cmsg->cmsg_len - CMSG_LEN(0);
-
-        if (!fd_size) {
-            continue;
-        }
+    struct iovec iov = { .iov_base = buf, .iov_len = len };
+    int ret;
+    size_t i;
+    int *msgfds = NULL;
+    size_t msgfds_num = 0;
+
+    if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+        ret = qio_channel_readv_full(s->ioc, &iov, 1,
+                                     &msgfds, &msgfds_num,
+                                     NULL);
+    } else {
+        ret = qio_channel_readv_full(s->ioc, &iov, 1,
+                                     NULL, NULL,
+                                     NULL);
+    }
 
+    if (msgfds_num) {
         /* close and clean read_msgfds */
         for (i = 0; i < s->read_msgfds_num; i++) {
             close(s->read_msgfds[i]);
@@ -2826,77 +2743,31 @@ static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg)
             g_free(s->read_msgfds);
         }
 
-        s->read_msgfds_num = fd_size / sizeof(int);
-        s->read_msgfds = g_malloc(fd_size);
-        memcpy(s->read_msgfds, CMSG_DATA(cmsg), fd_size);
-
-        for (i = 0; i < s->read_msgfds_num; i++) {
-            int fd = s->read_msgfds[i];
-            if (fd < 0) {
-                continue;
-            }
-
-            /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
-            qemu_set_block(fd);
-
-    #ifndef MSG_CMSG_CLOEXEC
-            qemu_set_cloexec(fd);
-    #endif
-        }
+        s->read_msgfds = msgfds;
+        s->read_msgfds_num = msgfds_num;
     }
-}
-
-static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len)
-{
-    TCPCharDriver *s = chr->opaque;
-    struct msghdr msg = { NULL, };
-    struct iovec iov[1];
-    union {
-        struct cmsghdr cmsg;
-        char control[CMSG_SPACE(sizeof(int) * TCP_MAX_FDS)];
-    } msg_control;
-    int flags = 0;
-    ssize_t ret;
 
-    iov[0].iov_base = buf;
-    iov[0].iov_len = len;
+    for (i = 0; i < s->read_msgfds_num; i++) {
+        int fd = s->read_msgfds[i];
+        if (fd < 0) {
+            continue;
+        }
 
-    msg.msg_iov = iov;
-    msg.msg_iovlen = 1;
-    msg.msg_control = &msg_control;
-    msg.msg_controllen = sizeof(msg_control);
+        /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
+        qemu_set_block(fd);
 
-#ifdef MSG_CMSG_CLOEXEC
-    flags |= MSG_CMSG_CLOEXEC;
+#ifndef MSG_CMSG_CLOEXEC
+        qemu_set_cloexec(fd);
 #endif
-    do {
-        ret = recvmsg(s->fd, &msg, flags);
-    } while (ret == -1 && errno == EINTR);
-
-    if (ret > 0 && s->is_unix) {
-        unix_process_msgfd(chr, &msg);
     }
 
     return ret;
 }
-#else
-static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len)
-{
-    TCPCharDriver *s = chr->opaque;
-    ssize_t ret;
-
-    do {
-        ret = qemu_recv(s->fd, buf, len, 0);
-    } while (ret == -1 && socket_error() == EINTR);
-
-    return ret;
-}
-#endif
 
 static GSource *tcp_chr_add_watch(CharDriverState *chr, GIOCondition cond)
 {
     TCPCharDriver *s = chr->opaque;
-    return g_io_create_watch(s->chan, cond);
+    return qio_channel_create_watch(s->ioc, cond);
 }
 
 static void tcp_chr_disconnect(CharDriverState *chr)
@@ -2904,24 +2775,25 @@ static void tcp_chr_disconnect(CharDriverState *chr)
     TCPCharDriver *s = chr->opaque;
 
     s->connected = 0;
-    if (s->listen_chan) {
-        s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN,
-                                       tcp_chr_accept, chr);
+    if (s->listen_ioc) {
+        s->listen_tag = qio_channel_add_watch(
+            QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
     }
     remove_fd_in_watch(chr);
-    g_io_channel_unref(s->chan);
-    s->chan = NULL;
-    closesocket(s->fd);
-    s->fd = -1;
-    SocketAddress_to_str(chr->filename, CHR_MAX_FILENAME_SIZE,
-                         "disconnected:", s->addr, s->is_listen, s->is_telnet);
+    object_unref(OBJECT(s->sioc));
+    s->sioc = NULL;
+    object_unref(OBJECT(s->ioc));
+    s->ioc = NULL;
+    g_free(chr->filename);
+    chr->filename = SocketAddress_to_str("disconnected:", s->addr,
+                                         s->is_listen, s->is_telnet);
     qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
     if (s->reconnect_time) {
         qemu_chr_socket_restart_timer(chr);
     }
 }
 
-static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
 {
     CharDriverState *chr = opaque;
     TCPCharDriver *s = chr->opaque;
@@ -2935,9 +2807,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
     if (len > s->max_size)
         len = s->max_size;
     size = tcp_chr_recv(chr, (void *)buf, len);
-    if (size == 0 ||
-        (size < 0 &&
-         socket_error() != EAGAIN && socket_error() != EWOULDBLOCK)) {
+    if (size == 0 || size == -1) {
         /* connection closed */
         tcp_chr_disconnect(chr);
     } else if (size > 0) {
@@ -2985,25 +2855,17 @@ static void tcp_chr_connect(void *opaque)
 {
     CharDriverState *chr = opaque;
     TCPCharDriver *s = chr->opaque;
-    struct sockaddr_storage ss, ps;
-    socklen_t ss_len = sizeof(ss), ps_len = sizeof(ps);
-
-    memset(&ss, 0, ss_len);
-    if (getsockname(s->fd, (struct sockaddr *) &ss, &ss_len) != 0) {
-        snprintf(chr->filename, CHR_MAX_FILENAME_SIZE,
-                 "Error in getsockname: %s\n", strerror(errno));
-    } else if (getpeername(s->fd, (struct sockaddr *) &ps, &ps_len) != 0) {
-        snprintf(chr->filename, CHR_MAX_FILENAME_SIZE,
-                 "Error in getpeername: %s\n", strerror(errno));
-    } else {
-        sockaddr_to_str(chr->filename, CHR_MAX_FILENAME_SIZE,
-                        &ss, ss_len, &ps, ps_len,
-                        s->is_listen, s->is_telnet);
-    }
+
+    g_free(chr->filename);
+    chr->filename = sockaddr_to_str(
+        &s->sioc->localAddr, s->sioc->localAddrLen,
+        &s->sioc->remoteAddr, s->sioc->remoteAddrLen,
+        s->is_listen, s->is_telnet);
 
     s->connected = 1;
-    if (s->chan) {
-        chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll,
+    if (s->ioc) {
+        chr->fd_in_tag = io_add_watch_poll(s->ioc,
+                                           tcp_chr_read_poll,
                                            tcp_chr_read, chr);
     }
     qemu_chr_be_generic_open(chr);
@@ -3014,82 +2876,195 @@ static void tcp_chr_update_read_handler(CharDriverState *chr)
     TCPCharDriver *s = chr->opaque;
 
     remove_fd_in_watch(chr);
-    if (s->chan) {
-        chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll,
+    if (s->ioc) {
+        chr->fd_in_tag = io_add_watch_poll(s->ioc,
+                                           tcp_chr_read_poll,
                                            tcp_chr_read, chr);
     }
 }
 
-#define IACSET(x,a,b,c) x[0] = a; x[1] = b; x[2] = c;
-static void tcp_chr_telnet_init(int fd)
+typedef struct {
+    CharDriverState *chr;
+    char buf[12];
+    size_t buflen;
+} TCPCharDriverTelnetInit;
+
+static gboolean tcp_chr_telnet_init_io(QIOChannel *ioc,
+                                       GIOCondition cond G_GNUC_UNUSED,
+                                       gpointer user_data)
 {
-    char buf[3];
-    /* Send the telnet negotion to put telnet in binary, no echo, single char mode */
-    IACSET(buf, 0xff, 0xfb, 0x01);  /* IAC WILL ECHO */
-    send(fd, (char *)buf, 3, 0);
-    IACSET(buf, 0xff, 0xfb, 0x03);  /* IAC WILL Suppress go ahead */
-    send(fd, (char *)buf, 3, 0);
-    IACSET(buf, 0xff, 0xfb, 0x00);  /* IAC WILL Binary */
-    send(fd, (char *)buf, 3, 0);
-    IACSET(buf, 0xff, 0xfd, 0x00);  /* IAC DO Binary */
-    send(fd, (char *)buf, 3, 0);
+    TCPCharDriverTelnetInit *init = user_data;
+    ssize_t ret;
+
+    ret = qio_channel_write(ioc, init->buf, init->buflen, NULL);
+    if (ret < 0) {
+        if (ret == QIO_CHANNEL_ERR_BLOCK) {
+            ret = 0;
+        } else {
+            tcp_chr_disconnect(init->chr);
+            return FALSE;
+        }
+    }
+    init->buflen -= ret;
+
+    if (init->buflen == 0) {
+        tcp_chr_connect(init->chr);
+        return FALSE;
+    }
+
+    memmove(init->buf, init->buf + ret, init->buflen);
+
+    return TRUE;
 }
 
-static int tcp_chr_add_client(CharDriverState *chr, int fd)
+static void tcp_chr_telnet_init(CharDriverState *chr)
+{
+    TCPCharDriver *s = chr->opaque;
+    TCPCharDriverTelnetInit *init =
+        g_new0(TCPCharDriverTelnetInit, 1);
+    size_t n = 0;
+
+    init->chr = chr;
+    init->buflen = 12;
+
+#define IACSET(x, a, b, c)                      \
+    do {                                        \
+        x[n++] = a;                             \
+        x[n++] = b;                             \
+        x[n++] = c;                             \
+    } while (0)
+
+    /* Prep the telnet negotion to put telnet in binary,
+     * no echo, single char mode */
+    IACSET(init->buf, 0xff, 0xfb, 0x01);  /* IAC WILL ECHO */
+    IACSET(init->buf, 0xff, 0xfb, 0x03);  /* IAC WILL Suppress go ahead */
+    IACSET(init->buf, 0xff, 0xfb, 0x00);  /* IAC WILL Binary */
+    IACSET(init->buf, 0xff, 0xfd, 0x00);  /* IAC DO Binary */
+
+#undef IACSET
+
+    qio_channel_add_watch(
+        s->ioc, G_IO_OUT,
+        tcp_chr_telnet_init_io,
+        init, NULL);
+}
+
+
+static void tcp_chr_tls_handshake(Object *source,
+                                  Error *err,
+                                  gpointer user_data)
 {
+    CharDriverState *chr = user_data;
     TCPCharDriver *s = chr->opaque;
-    if (s->fd != -1)
+
+    if (err) {
+        tcp_chr_disconnect(chr);
+    } else {
+        if (s->do_telnetopt) {
+            tcp_chr_telnet_init(chr);
+        } else {
+            tcp_chr_connect(chr);
+        }
+    }
+}
+
+
+static void tcp_chr_tls_init(CharDriverState *chr)
+{
+    TCPCharDriver *s = chr->opaque;
+    QIOChannelTLS *tioc;
+    Error *err = NULL;
+
+    if (s->is_listen) {
+        tioc = qio_channel_tls_new_server(
+            s->ioc, s->tls_creds,
+            NULL, /* XXX Use an ACL */
+            &err);
+    } else {
+        tioc = qio_channel_tls_new_client(
+            s->ioc, s->tls_creds,
+            s->addr->u.inet->host,
+            &err);
+    }
+    if (tioc == NULL) {
+        error_free(err);
+        tcp_chr_disconnect(chr);
+    }
+    object_unref(OBJECT(s->ioc));
+    s->ioc = QIO_CHANNEL(tioc);
+
+    qio_channel_tls_handshake(tioc,
+                              tcp_chr_tls_handshake,
+                              chr,
+                              NULL);
+}
+
+
+static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc)
+{
+    TCPCharDriver *s = chr->opaque;
+    if (s->ioc != NULL) {
        return -1;
+    }
 
-    qemu_set_nonblock(fd);
-    if (s->do_nodelay)
-        socket_set_nodelay(fd);
-    s->fd = fd;
-    s->chan = io_channel_from_socket(fd);
+    s->ioc = QIO_CHANNEL(sioc);
+    object_ref(OBJECT(sioc));
+    s->sioc = sioc;
+    object_ref(OBJECT(sioc));
+
+    if (s->do_nodelay) {
+        qio_channel_set_delay(s->ioc, false);
+    }
     if (s->listen_tag) {
         g_source_remove(s->listen_tag);
         s->listen_tag = 0;
     }
-    tcp_chr_connect(chr);
+
+    if (s->tls_creds) {
+        tcp_chr_tls_init(chr);
+    } else {
+        if (s->do_telnetopt) {
+            tcp_chr_telnet_init(chr);
+        } else {
+            tcp_chr_connect(chr);
+        }
+    }
 
     return 0;
 }
 
-static gboolean tcp_chr_accept(GIOChannel *channel, GIOCondition cond, void *opaque)
+
+static int tcp_chr_add_client(CharDriverState *chr, int fd)
+{
+    int ret;
+    QIOChannelSocket *sioc;
+
+    sioc = qio_channel_socket_new_fd(fd, NULL);
+    if (!sioc) {
+        return -1;
+    }
+    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
+    ret = tcp_chr_new_client(chr, sioc);
+    object_unref(OBJECT(sioc));
+    return ret;
+}
+
+static gboolean tcp_chr_accept(QIOChannel *channel,
+                               GIOCondition cond,
+                               void *opaque)
 {
     CharDriverState *chr = opaque;
-    TCPCharDriver *s = chr->opaque;
-    struct sockaddr_in saddr;
-#ifndef _WIN32
-    struct sockaddr_un uaddr;
-#endif
-    struct sockaddr *addr;
-    socklen_t len;
-    int fd;
+    QIOChannelSocket *sioc;
 
-    for(;;) {
-#ifndef _WIN32
-       if (s->is_unix) {
-           len = sizeof(uaddr);
-           addr = (struct sockaddr *)&uaddr;
-       } else
-#endif
-       {
-           len = sizeof(saddr);
-           addr = (struct sockaddr *)&saddr;
-       }
-        fd = qemu_accept(s->listen_fd, addr, &len);
-        if (fd < 0 && errno != EINTR) {
-            s->listen_tag = 0;
-            return FALSE;
-        } else if (fd >= 0) {
-            if (s->do_telnetopt)
-                tcp_chr_telnet_init(fd);
-            break;
-        }
+    sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(channel),
+                                     NULL);
+    if (!sioc) {
+        return TRUE;
     }
-    if (tcp_chr_add_client(chr, fd) < 0)
-       close(fd);
+
+    tcp_chr_new_client(chr, sioc);
+
+    object_unref(OBJECT(sioc));
 
     return TRUE;
 }
@@ -3104,22 +3079,16 @@ static void tcp_chr_close(CharDriverState *chr)
         s->reconnect_timer = 0;
     }
     qapi_free_SocketAddress(s->addr);
-    if (s->fd >= 0) {
-        remove_fd_in_watch(chr);
-        if (s->chan) {
-            g_io_channel_unref(s->chan);
-        }
-        closesocket(s->fd);
+    remove_fd_in_watch(chr);
+    if (s->ioc) {
+        object_unref(OBJECT(s->ioc));
     }
-    if (s->listen_fd >= 0) {
-        if (s->listen_tag) {
-            g_source_remove(s->listen_tag);
-            s->listen_tag = 0;
-        }
-        if (s->listen_chan) {
-            g_io_channel_unref(s->listen_chan);
-        }
-        closesocket(s->listen_fd);
+    if (s->listen_tag) {
+        g_source_remove(s->listen_tag);
+        s->listen_tag = 0;
+    }
+    if (s->listen_ioc) {
+        object_unref(OBJECT(s->listen_ioc));
     }
     if (s->read_msgfds_num) {
         for (i = 0; i < s->read_msgfds_num; i++) {
@@ -3127,6 +3096,9 @@ static void tcp_chr_close(CharDriverState *chr)
         }
         g_free(s->read_msgfds);
     }
+    if (s->tls_creds) {
+        object_unref(OBJECT(s->tls_creds));
+    }
     if (s->write_msgfds_num) {
         g_free(s->write_msgfds);
     }
@@ -3134,57 +3106,63 @@ static void tcp_chr_close(CharDriverState *chr)
     qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
 }
 
-static void qemu_chr_finish_socket_connection(CharDriverState *chr, int fd)
+static void qemu_chr_finish_socket_connection(CharDriverState *chr,
+                                              QIOChannelSocket *sioc)
 {
     TCPCharDriver *s = chr->opaque;
 
     if (s->is_listen) {
-        s->listen_fd = fd;
-        s->listen_chan = io_channel_from_socket(s->listen_fd);
-        s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN,
-                                       tcp_chr_accept, chr);
+        s->listen_ioc = sioc;
+        s->listen_tag = qio_channel_add_watch(
+            QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
     } else {
-        s->connected = 1;
-        s->fd = fd;
-        socket_set_nodelay(fd);
-        s->chan = io_channel_from_socket(s->fd);
-        tcp_chr_connect(chr);
+        tcp_chr_new_client(chr, sioc);
+        object_unref(OBJECT(sioc));
     }
 }
 
-static void qemu_chr_socket_connected(int fd, Error *err, void *opaque)
+static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque)
 {
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(src);
     CharDriverState *chr = opaque;
     TCPCharDriver *s = chr->opaque;
 
-    if (fd < 0) {
+    if (err) {
         check_report_connect_error(chr, err);
+        object_unref(src);
         return;
     }
 
     s->connect_err_reported = false;
-    qemu_chr_finish_socket_connection(chr, fd);
+    qemu_chr_finish_socket_connection(chr, sioc);
 }
 
 static bool qemu_chr_open_socket_fd(CharDriverState *chr, Error **errp)
 {
     TCPCharDriver *s = chr->opaque;
-    int fd;
+    QIOChannelSocket *sioc = qio_channel_socket_new();
 
     if (s->is_listen) {
-        fd = socket_listen(s->addr, errp);
+        if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) {
+            goto fail;
+        }
+        qemu_chr_finish_socket_connection(chr, sioc);
     } else if (s->reconnect_time) {
-        fd = socket_connect(s->addr, errp, qemu_chr_socket_connected, chr);
-        return fd >= 0;
+        qio_channel_socket_connect_async(sioc, s->addr,
+                                         qemu_chr_socket_connected,
+                                         chr, NULL);
     } else {
-        fd = socket_connect(s->addr, errp, NULL, NULL);
-    }
-    if (fd < 0) {
-        return false;
+        if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) {
+            goto fail;
+        }
+        qemu_chr_finish_socket_connection(chr, sioc);
     }
 
-    qemu_chr_finish_socket_connection(chr, fd);
     return true;
+
+ fail:
+    object_unref(OBJECT(sioc));
+    return false;
 }
 
 /*********************************************************/
@@ -3651,6 +3629,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
     const char *path = qemu_opt_get(opts, "path");
     const char *host = qemu_opt_get(opts, "host");
     const char *port = qemu_opt_get(opts, "port");
+    const char *tls_creds = qemu_opt_get(opts, "tls-creds");
     SocketAddress *addr;
 
     if (!path) {
@@ -3662,6 +3641,11 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
             error_setg(errp, "chardev: socket: no port given");
             return;
         }
+    } else {
+        if (tls_creds) {
+            error_setg(errp, "TLS can only be used over TCP socket");
+            return;
+        }
     }
 
     backend->u.socket = g_new0(ChardevSocket, 1);
@@ -3677,6 +3661,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
     backend->u.socket->wait = is_waitconnect;
     backend->u.socket->has_reconnect = true;
     backend->u.socket->reconnect = reconnect;
+    backend->u.socket->tls_creds = g_strdup(tls_creds);
 
     addr = g_new0(SocketAddress, 1);
     if (path) {
@@ -4103,6 +4088,9 @@ QemuOptsList qemu_chardev_opts = {
         },{
             .name = "telnet",
             .type = QEMU_OPT_BOOL,
+        },{
+            .name = "tls-creds",
+            .type = QEMU_OPT_STRING,
         },{
             .name = "width",
             .type = QEMU_OPT_NUMBER,
@@ -4315,12 +4303,43 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
     }
     s = g_new0(TCPCharDriver, 1);
 
-    s->fd = -1;
-    s->listen_fd = -1;
     s->is_unix = addr->type == SOCKET_ADDRESS_KIND_UNIX;
     s->is_listen = is_listen;
     s->is_telnet = is_telnet;
     s->do_nodelay = do_nodelay;
+    if (sock->tls_creds) {
+        Object *creds;
+        creds = object_resolve_path_component(
+            object_get_objects_root(), sock->tls_creds);
+        if (!creds) {
+            error_setg(errp, "No TLS credentials with id '%s'",
+                       sock->tls_creds);
+            goto error;
+        }
+        s->tls_creds = (QCryptoTLSCreds *)
+            object_dynamic_cast(creds,
+                                TYPE_QCRYPTO_TLS_CREDS);
+        if (!s->tls_creds) {
+            error_setg(errp, "Object with id '%s' is not TLS credentials",
+                       sock->tls_creds);
+            goto error;
+        }
+        object_ref(OBJECT(s->tls_creds));
+        if (is_listen) {
+            if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+                error_setg(errp, "%s",
+                           "Expected TLS credentials for server endpoint");
+                goto error;
+            }
+        } else {
+            if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+                error_setg(errp, "%s",
+                           "Expected TLS credentials for client endpoint");
+                goto error;
+            }
+        }
+    }
+
     qapi_copy_SocketAddress(&s->addr, sock->addr);
 
     chr->opaque = s;
@@ -4335,9 +4354,8 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
     /* be isn't opened until we get a connection */
     chr->explicit_be_open = true;
 
-    chr->filename = g_malloc(CHR_MAX_FILENAME_SIZE);
-    SocketAddress_to_str(chr->filename, CHR_MAX_FILENAME_SIZE, "disconnected:",
-                         addr, is_listen, is_telnet);
+    chr->filename = SocketAddress_to_str("disconnected:",
+                                         addr, is_listen, is_telnet);
 
     if (is_listen) {
         if (is_telnet) {
@@ -4350,19 +4368,25 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
     if (s->reconnect_time) {
         socket_try_connect(chr);
     } else if (!qemu_chr_open_socket_fd(chr, errp)) {
-        g_free(s);
-        qemu_chr_free_common(chr);
-        return NULL;
+        goto error;
     }
 
     if (is_listen && is_waitconnect) {
         fprintf(stderr, "QEMU waiting for connection on: %s\n",
                 chr->filename);
-        tcp_chr_accept(s->listen_chan, G_IO_IN, chr);
-        qemu_set_nonblock(s->listen_fd);
+        tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr);
+        qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL);
     }
 
     return chr;
+
+ error:
+    if (s->tls_creds) {
+        object_unref(OBJECT(s->tls_creds));
+    }
+    g_free(s);
+    qemu_chr_free_common(chr);
+    return NULL;
 }
 
 static CharDriverState *qmp_chardev_open_udp(const char *id,
@@ -4372,13 +4396,15 @@ static CharDriverState *qmp_chardev_open_udp(const char *id,
 {
     ChardevUdp *udp = backend->u.udp;
     ChardevCommon *common = qapi_ChardevUdp_base(backend->u.udp);
-    int fd;
+    QIOChannelSocket *sioc = qio_channel_socket_new();
 
-    fd = socket_dgram(udp->remote, udp->local, errp);
-    if (fd < 0) {
+    if (qio_channel_socket_dgram_sync(sioc,
+                                      udp->remote, udp->local,
+                                      errp) < 0) {
+        object_unref(OBJECT(sioc));
         return NULL;
     }
-    return qemu_chr_open_udp_fd(fd, common, errp);
+    return qemu_chr_open_udp(sioc, common, errp);
 }
 
 ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
index 7bc388231f9776eec509fb97e954cec2630d3166..ca4d9de15e654b789af71fc8937b344fe6fa4954 100644 (file)
@@ -259,7 +259,7 @@ Linux should boot and give you a prompt.
 
 @example
 @c man begin SYNOPSIS
-usage: qemu-system-i386 [options] [@var{disk_image}]
+@command{qemu-system-i386} [@var{options}] [@var{disk_image}]
 @c man end
 @end example
 
@@ -1406,7 +1406,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}.
 This USB device implements the USB Transport Layer of HCI.  Example
 usage:
 @example
-qemu-system-i386 [...OPTIONS...] -usbdevice bt:hci,vlan=3 -bt device:keyboard,vlan=3
+@command{qemu-system-i386} [...@var{OPTIONS}...] @option{-usbdevice} bt:hci,vlan=3 @option{-bt} device:keyboard,vlan=3
 @end example
 @end table
 
@@ -2755,7 +2755,7 @@ qemu-i386 /usr/local/qemu-i386/wine/bin/wine \
 @subsection Command line options
 
 @example
-usage: qemu-i386 [-h] [-d] [-L path] [-s size] [-cpu model] [-g port] [-B offset] [-R size] program [arguments...]
+@command{qemu-i386} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-cpu} @var{model}] [@option{-g} @var{port}] [@option{-B} @var{offset}] [@option{-R} @var{size}] @var{program} [@var{arguments}...]
 @end example
 
 @table @option
@@ -2897,7 +2897,7 @@ qemu-sparc64 /bin/ls
 @subsection Command line options
 
 @example
-usage: qemu-sparc64 [-h] [-d] [-L path] [-s size] [-bsd type] program [arguments...]
+@command{qemu-sparc64} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-bsd} @var{type}] @var{program} [@var{arguments}...]
 @end example
 
 @table @option
index 536a9b52418e0d1cda8350b57c2817963af79764..0e53bf6b2c510d6e46adf104a11bc537100fc6a3 100644 (file)
@@ -1,6 +1,6 @@
 @example
 @c man begin SYNOPSIS
-usage: qemu-ga [OPTIONS]
+@command{qemu-ga} [@var{OPTIONS}]
 @c man end
 @end example
 
index 55c6be391d15c60fa88933517b3dbc46a30d63b1..7163a108e26fc85ac0b94971b3b6131ab4707f77 100644 (file)
@@ -1,6 +1,6 @@
 @example
 @c man begin SYNOPSIS
-usage: qemu-img command [command options]
+@command{qemu-img} @var{command} [@var{command} @var{options}]
 @c man end
 @end example
 
index b4763ba226bff82bd74adb0a847efe43cfbe783b..f31a240bed0f4bfdcbd281d14cbdb210c037091f 100644 (file)
@@ -2092,7 +2092,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
     "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
     "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n"
     "         [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off]\n"
-    "         [,logfile=PATH][,logappend=on|off] (tcp)\n"
+    "         [,logfile=PATH][,logappend=on|off][,tls-creds=ID] (tcp)\n"
     "-chardev socket,id=id,path=path[,server][,nowait][,telnet][,reconnect=seconds]\n"
     "         [,mux=on|off][,logfile=PATH][,logappend=on|off] (unix)\n"
     "-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n"
@@ -2172,7 +2172,7 @@ Further options to each backend are described below.
 A void device. This device will not emit any data, and will drop any data it
 receives. The null backend does not take any options.
 
-@item -chardev socket ,id=@var{id} [@var{TCP options} or @var{unix options}] [,server] [,nowait] [,telnet] [,reconnect=@var{seconds}]
+@item -chardev socket ,id=@var{id} [@var{TCP options} or @var{unix options}] [,server] [,nowait] [,telnet] [,reconnect=@var{seconds}] [,tls-creds=@var{id}]
 
 Create a two-way stream socket, which can be either a TCP or a unix socket. A
 unix socket will be created if @option{path} is specified. Behaviour is
@@ -2190,6 +2190,11 @@ escape sequences.
 the remote end goes away.  qemu will delay this many seconds and then attempt
 to reconnect.  Zero disables reconnecting, and is the default.
 
+@option{tls-creds} requests enablement of the TLS protocol for encryption,
+and specifies the id of the TLS credentials to use for the handshake. The
+credentials must be previously created with the @option{-object tls-creds}
+argument.
+
 TCP and unix socket options are given below:
 
 @table @option
index 08796fff8cf5eec5fdc4c0f55532b36faae33e73..f274bf80fa2e96e8dd5d944b18448d97eeb9de7f 100644 (file)
-# This python script adds a new gdb command, "dump-guest-memory". It
-# should be loaded with "source dump-guest-memory.py" at the (gdb)
-# prompt.
-#
-# Copyright (C) 2013, Red Hat, Inc.
-#
-# Authors:
-#   Laszlo Ersek <lersek@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later. See
-# the COPYING file in the top-level directory.
-#
+"""
+This python script adds a new gdb command, "dump-guest-memory". It
+should be loaded with "source dump-guest-memory.py" at the (gdb)
+prompt.
+
+Copyright (C) 2013, Red Hat, Inc.
+
+Authors:
+   Laszlo Ersek <lersek@redhat.com>
+   Janosch Frank <frankja@linux.vnet.ibm.com>
+
+This work is licensed under the terms of the GNU GPL, version 2 or later. See
+the COPYING file in the top-level directory.
+"""
+
+import ctypes
+
+UINTPTR_T = gdb.lookup_type("uintptr_t")
+
+TARGET_PAGE_SIZE = 0x1000
+TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
+
+# Special value for e_phnum. This indicates that the real number of
+# program headers is too large to fit into e_phnum. Instead the real
+# value is in the field sh_info of section 0.
+PN_XNUM = 0xFFFF
+
+EV_CURRENT = 1
+
+ELFCLASS32 = 1
+ELFCLASS64 = 2
+
+ELFDATA2LSB = 1
+ELFDATA2MSB = 2
+
+ET_CORE = 4
+
+PT_LOAD = 1
+PT_NOTE = 4
+
+EM_386 = 3
+EM_PPC = 20
+EM_PPC64 = 21
+EM_S390 = 22
+EM_AARCH = 183
+EM_X86_64 = 62
+
+class ELF(object):
+    """Representation of a ELF file."""
+
+    def __init__(self, arch):
+        self.ehdr = None
+        self.notes = []
+        self.segments = []
+        self.notes_size = 0
+        self.endianess = None
+        self.elfclass = ELFCLASS64
+
+        if arch == 'aarch64-le':
+            self.endianess = ELFDATA2LSB
+            self.elfclass = ELFCLASS64
+            self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+            self.ehdr.e_machine = EM_AARCH
+
+        elif arch == 'aarch64-be':
+            self.endianess = ELFDATA2MSB
+            self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+            self.ehdr.e_machine = EM_AARCH
+
+        elif arch == 'X86_64':
+            self.endianess = ELFDATA2LSB
+            self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+            self.ehdr.e_machine = EM_X86_64
+
+        elif arch == '386':
+            self.endianess = ELFDATA2LSB
+            self.elfclass = ELFCLASS32
+            self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+            self.ehdr.e_machine = EM_386
+
+        elif arch == 's390':
+            self.endianess = ELFDATA2MSB
+            self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+            self.ehdr.e_machine = EM_S390
+
+        elif arch == 'ppc64-le':
+            self.endianess = ELFDATA2LSB
+            self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+            self.ehdr.e_machine = EM_PPC64
+
+        elif arch == 'ppc64-be':
+            self.endianess = ELFDATA2MSB
+            self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+            self.ehdr.e_machine = EM_PPC64
+
+        else:
+            raise gdb.GdbError("No valid arch type specified.\n"
+                               "Currently supported types:\n"
+                               "aarch64-be, aarch64-le, X86_64, 386, s390, "
+                               "ppc64-be, ppc64-le")
+
+        self.add_segment(PT_NOTE, 0, 0)
+
+    def add_note(self, n_name, n_desc, n_type):
+        """Adds a note to the ELF."""
+
+        note = get_arch_note(self.endianess, len(n_name), len(n_desc))
+        note.n_namesz = len(n_name) + 1
+        note.n_descsz = len(n_desc)
+        note.n_name = n_name.encode()
+        note.n_type = n_type
+
+        # Desc needs to be 4 byte aligned (although the 64bit spec
+        # specifies 8 byte). When defining n_desc as uint32 it will be
+        # automatically aligned but we need the memmove to copy the
+        # string into it.
+        ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
+
+        self.notes.append(note)
+        self.segments[0].p_filesz += ctypes.sizeof(note)
+        self.segments[0].p_memsz += ctypes.sizeof(note)
+
+    def add_segment(self, p_type, p_paddr, p_size):
+        """Adds a segment to the elf."""
+
+        phdr = get_arch_phdr(self.endianess, self.elfclass)
+        phdr.p_type = p_type
+        phdr.p_paddr = p_paddr
+        phdr.p_filesz = p_size
+        phdr.p_memsz = p_size
+        self.segments.append(phdr)
+        self.ehdr.e_phnum += 1
+
+    def to_file(self, elf_file):
+        """Writes all ELF structures to the the passed file.
+
+        Structure:
+        Ehdr
+        Segment 0:PT_NOTE
+        Segment 1:PT_LOAD
+        Segment N:PT_LOAD
+        Note    0..N
+        Dump contents
+        """
+        elf_file.write(self.ehdr)
+        off = ctypes.sizeof(self.ehdr) + \
+              len(self.segments) * ctypes.sizeof(self.segments[0])
+
+        for phdr in self.segments:
+            phdr.p_offset = off
+            elf_file.write(phdr)
+            off += phdr.p_filesz
+
+        for note in self.notes:
+            elf_file.write(note)
+
+
+def get_arch_note(endianess, len_name, len_desc):
+    """Returns a Note class with the specified endianess."""
+
+    if endianess == ELFDATA2LSB:
+        superclass = ctypes.LittleEndianStructure
+    else:
+        superclass = ctypes.BigEndianStructure
+
+    len_name = len_name + 1
+
+    class Note(superclass):
+        """Represents an ELF note, includes the content."""
+
+        _fields_ = [("n_namesz", ctypes.c_uint32),
+                    ("n_descsz", ctypes.c_uint32),
+                    ("n_type", ctypes.c_uint32),
+                    ("n_name", ctypes.c_char * len_name),
+                    ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
+    return Note()
+
+
+class Ident(ctypes.Structure):
+    """Represents the ELF ident array in the ehdr structure."""
+
+    _fields_ = [('ei_mag0', ctypes.c_ubyte),
+                ('ei_mag1', ctypes.c_ubyte),
+                ('ei_mag2', ctypes.c_ubyte),
+                ('ei_mag3', ctypes.c_ubyte),
+                ('ei_class', ctypes.c_ubyte),
+                ('ei_data', ctypes.c_ubyte),
+                ('ei_version', ctypes.c_ubyte),
+                ('ei_osabi', ctypes.c_ubyte),
+                ('ei_abiversion', ctypes.c_ubyte),
+                ('ei_pad', ctypes.c_ubyte * 7)]
+
+    def __init__(self, endianess, elfclass):
+        self.ei_mag0 = 0x7F
+        self.ei_mag1 = ord('E')
+        self.ei_mag2 = ord('L')
+        self.ei_mag3 = ord('F')
+        self.ei_class = elfclass
+        self.ei_data = endianess
+        self.ei_version = EV_CURRENT
+
+
+def get_arch_ehdr(endianess, elfclass):
+    """Returns a EHDR64 class with the specified endianess."""
+
+    if endianess == ELFDATA2LSB:
+        superclass = ctypes.LittleEndianStructure
+    else:
+        superclass = ctypes.BigEndianStructure
+
+    class EHDR64(superclass):
+        """Represents the 64 bit ELF header struct."""
+
+        _fields_ = [('e_ident', Ident),
+                    ('e_type', ctypes.c_uint16),
+                    ('e_machine', ctypes.c_uint16),
+                    ('e_version', ctypes.c_uint32),
+                    ('e_entry', ctypes.c_uint64),
+                    ('e_phoff', ctypes.c_uint64),
+                    ('e_shoff', ctypes.c_uint64),
+                    ('e_flags', ctypes.c_uint32),
+                    ('e_ehsize', ctypes.c_uint16),
+                    ('e_phentsize', ctypes.c_uint16),
+                    ('e_phnum', ctypes.c_uint16),
+                    ('e_shentsize', ctypes.c_uint16),
+                    ('e_shnum', ctypes.c_uint16),
+                    ('e_shstrndx', ctypes.c_uint16)]
+
+        def __init__(self):
+            super(superclass, self).__init__()
+            self.e_ident = Ident(endianess, elfclass)
+            self.e_type = ET_CORE
+            self.e_version = EV_CURRENT
+            self.e_ehsize = ctypes.sizeof(self)
+            self.e_phoff = ctypes.sizeof(self)
+            self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
+            self.e_phnum = 0
+
+
+    class EHDR32(superclass):
+        """Represents the 32 bit ELF header struct."""
+
+        _fields_ = [('e_ident', Ident),
+                    ('e_type', ctypes.c_uint16),
+                    ('e_machine', ctypes.c_uint16),
+                    ('e_version', ctypes.c_uint32),
+                    ('e_entry', ctypes.c_uint32),
+                    ('e_phoff', ctypes.c_uint32),
+                    ('e_shoff', ctypes.c_uint32),
+                    ('e_flags', ctypes.c_uint32),
+                    ('e_ehsize', ctypes.c_uint16),
+                    ('e_phentsize', ctypes.c_uint16),
+                    ('e_phnum', ctypes.c_uint16),
+                    ('e_shentsize', ctypes.c_uint16),
+                    ('e_shnum', ctypes.c_uint16),
+                    ('e_shstrndx', ctypes.c_uint16)]
+
+        def __init__(self):
+            super(superclass, self).__init__()
+            self.e_ident = Ident(endianess, elfclass)
+            self.e_type = ET_CORE
+            self.e_version = EV_CURRENT
+            self.e_ehsize = ctypes.sizeof(self)
+            self.e_phoff = ctypes.sizeof(self)
+            self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
+            self.e_phnum = 0
+
+    # End get_arch_ehdr
+    if elfclass == ELFCLASS64:
+        return EHDR64()
+    else:
+        return EHDR32()
+
+
+def get_arch_phdr(endianess, elfclass):
+    """Returns a 32 or 64 bit PHDR class with the specified endianess."""
+
+    if endianess == ELFDATA2LSB:
+        superclass = ctypes.LittleEndianStructure
+    else:
+        superclass = ctypes.BigEndianStructure
+
+    class PHDR64(superclass):
+        """Represents the 64 bit ELF program header struct."""
+
+        _fields_ = [('p_type', ctypes.c_uint32),
+                    ('p_flags', ctypes.c_uint32),
+                    ('p_offset', ctypes.c_uint64),
+                    ('p_vaddr', ctypes.c_uint64),
+                    ('p_paddr', ctypes.c_uint64),
+                    ('p_filesz', ctypes.c_uint64),
+                    ('p_memsz', ctypes.c_uint64),
+                    ('p_align', ctypes.c_uint64)]
+
+    class PHDR32(superclass):
+        """Represents the 32 bit ELF program header struct."""
+
+        _fields_ = [('p_type', ctypes.c_uint32),
+                    ('p_offset', ctypes.c_uint32),
+                    ('p_vaddr', ctypes.c_uint32),
+                    ('p_paddr', ctypes.c_uint32),
+                    ('p_filesz', ctypes.c_uint32),
+                    ('p_memsz', ctypes.c_uint32),
+                    ('p_flags', ctypes.c_uint32),
+                    ('p_align', ctypes.c_uint32)]
+
+    # End get_arch_phdr
+    if elfclass == ELFCLASS64:
+        return PHDR64()
+    else:
+        return PHDR32()
+
+
+def int128_get64(val):
+    """Returns low 64bit part of Int128 struct."""
+
+    assert val["hi"] == 0
+    return val["lo"]
+
+
+def qlist_foreach(head, field_str):
+    """Generator for qlists."""
+
+    var_p = head["lh_first"]
+    while var_p != 0:
+        var = var_p.dereference()
+        var_p = var[field_str]["le_next"]
+        yield var
+
+
+def qemu_get_ram_block(ram_addr):
+    """Returns the RAMBlock struct to which the given address belongs."""
+
+    ram_blocks = gdb.parse_and_eval("ram_list.blocks")
+
+    for block in qlist_foreach(ram_blocks, "next"):
+        if (ram_addr - block["offset"]) < block["used_length"]:
+            return block
+
+    raise gdb.GdbError("Bad ram offset %x" % ram_addr)
+
+
+def qemu_get_ram_ptr(ram_addr):
+    """Returns qemu vaddr for given guest physical address."""
+
+    block = qemu_get_ram_block(ram_addr)
+    return block["host"] + (ram_addr - block["offset"])
+
+
+def memory_region_get_ram_ptr(memory_region):
+    if memory_region["alias"] != 0:
+        return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
+                + memory_region["alias_offset"])
+
+    return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK)
+
+
+def get_guest_phys_blocks():
+    """Returns a list of ram blocks.
+
+    Each block entry contains:
+    'target_start': guest block phys start address
+    'target_end':   guest block phys end address
+    'host_addr':    qemu vaddr of the block's start
+    """
+
+    guest_phys_blocks = []
+
+    print("guest RAM blocks:")
+    print("target_start     target_end       host_addr        message "
+          "count")
+    print("---------------- ---------------- ---------------- ------- "
+          "-----")
+
+    current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
+    current_map = current_map_p.dereference()
+
+    # Conversion to int is needed for python 3
+    # compatibility. Otherwise range doesn't cast the value itself and
+    # breaks.
+    for cur in range(int(current_map["nr"])):
+        flat_range = (current_map["ranges"] + cur).dereference()
+        memory_region = flat_range["mr"].dereference()
+
+        # we only care about RAM
+        if not memory_region["ram"]:
+            continue
+
+        section_size = int128_get64(flat_range["addr"]["size"])
+        target_start = int128_get64(flat_range["addr"]["start"])
+        target_end = target_start + section_size
+        host_addr = (memory_region_get_ram_ptr(memory_region)
+                     + flat_range["offset_in_region"])
+        predecessor = None
+
+        # find continuity in guest physical address space
+        if len(guest_phys_blocks) > 0:
+            predecessor = guest_phys_blocks[-1]
+            predecessor_size = (predecessor["target_end"] -
+                                predecessor["target_start"])
+
+            # the memory API guarantees monotonically increasing
+            # traversal
+            assert predecessor["target_end"] <= target_start
+
+            # we want continuity in both guest-physical and
+            # host-virtual memory
+            if (predecessor["target_end"] < target_start or
+                predecessor["host_addr"] + predecessor_size != host_addr):
+                predecessor = None
+
+        if predecessor is None:
+            # isolated mapping, add it to the list
+            guest_phys_blocks.append({"target_start": target_start,
+                                      "target_end":   target_end,
+                                      "host_addr":    host_addr})
+            message = "added"
+        else:
+            # expand predecessor until @target_end; predecessor's
+            # start doesn't change
+            predecessor["target_end"] = target_end
+            message = "joined"
+
+        print("%016x %016x %016x %-7s %5u" %
+              (target_start, target_end, host_addr.cast(UINTPTR_T),
+               message, len(guest_phys_blocks)))
+
+    return guest_phys_blocks
+
+
 # The leading docstring doesn't have idiomatic Python formatting. It is
 # printed by gdb's "help" command (the first line is printed in the
 # "help data" summary), and it should match how other help texts look in
 # gdb.
-
-import struct
-
 class DumpGuestMemory(gdb.Command):
     """Extract guest vmcore from qemu process coredump.
 
-The sole argument is FILE, identifying the target file to write the
-guest vmcore to.
+The two required arguments are FILE and ARCH:
+FILE identifies the target file to write the guest vmcore to.
+ARCH specifies the architecture for which the core will be generated.
 
 This GDB command reimplements the dump-guest-memory QMP command in
 python, using the representation of guest memory as captured in the qemu
 coredump. The qemu process that has been dumped must have had the
-command line option "-machine dump-guest-core=on".
+command line option "-machine dump-guest-core=on" which is the default.
 
 For simplicity, the "paging", "begin" and "end" parameters of the QMP
 command are not supported -- no attempt is made to get the guest's
 internal paging structures (ie. paging=false is hard-wired), and guest
 memory is always fully dumped.
 
-Only x86_64 guests are supported.
+Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
+ppc64-le guests are supported.
 
 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
 not written to the vmcore. Preparing these would require context that is
@@ -47,293 +464,66 @@ deliberately called abort(), or it was dumped in response to a signal at
 a halfway fortunate point, then its coredump should be in reasonable
 shape and this command should mostly work."""
 
-    TARGET_PAGE_SIZE = 0x1000
-    TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
-
-    # Various ELF constants
-    EM_X86_64   = 62        # AMD x86-64 target machine
-    ELFDATA2LSB = 1         # little endian
-    ELFCLASS64  = 2
-    ELFMAG      = "\x7FELF"
-    EV_CURRENT  = 1
-    ET_CORE     = 4
-    PT_LOAD     = 1
-    PT_NOTE     = 4
-
-    # Special value for e_phnum. This indicates that the real number of
-    # program headers is too large to fit into e_phnum. Instead the real
-    # value is in the field sh_info of section 0.
-    PN_XNUM = 0xFFFF
-
-    # Format strings for packing and header size calculation.
-    ELF64_EHDR = ("4s" # e_ident/magic
-                  "B"  # e_ident/class
-                  "B"  # e_ident/data
-                  "B"  # e_ident/version
-                  "B"  # e_ident/osabi
-                  "8s" # e_ident/pad
-                  "H"  # e_type
-                  "H"  # e_machine
-                  "I"  # e_version
-                  "Q"  # e_entry
-                  "Q"  # e_phoff
-                  "Q"  # e_shoff
-                  "I"  # e_flags
-                  "H"  # e_ehsize
-                  "H"  # e_phentsize
-                  "H"  # e_phnum
-                  "H"  # e_shentsize
-                  "H"  # e_shnum
-                  "H"  # e_shstrndx
-                 )
-    ELF64_PHDR = ("I"  # p_type
-                  "I"  # p_flags
-                  "Q"  # p_offset
-                  "Q"  # p_vaddr
-                  "Q"  # p_paddr
-                  "Q"  # p_filesz
-                  "Q"  # p_memsz
-                  "Q"  # p_align
-                 )
-
     def __init__(self):
         super(DumpGuestMemory, self).__init__("dump-guest-memory",
                                               gdb.COMMAND_DATA,
                                               gdb.COMPLETE_FILENAME)
-        self.uintptr_t     = gdb.lookup_type("uintptr_t")
-        self.elf64_ehdr_le = struct.Struct("<%s" % self.ELF64_EHDR)
-        self.elf64_phdr_le = struct.Struct("<%s" % self.ELF64_PHDR)
-
-    def int128_get64(self, val):
-        assert (val["hi"] == 0)
-        return val["lo"]
-
-    def qlist_foreach(self, head, field_str):
-        var_p = head["lh_first"]
-        while (var_p != 0):
-            var = var_p.dereference()
-            yield var
-            var_p = var[field_str]["le_next"]
-
-    def qemu_get_ram_block(self, ram_addr):
-        ram_blocks = gdb.parse_and_eval("ram_list.blocks")
-        for block in self.qlist_foreach(ram_blocks, "next"):
-            if (ram_addr - block["offset"] < block["used_length"]):
-                return block
-        raise gdb.GdbError("Bad ram offset %x" % ram_addr)
-
-    def qemu_get_ram_ptr(self, ram_addr):
-        block = self.qemu_get_ram_block(ram_addr)
-        return block["host"] + (ram_addr - block["offset"])
-
-    def memory_region_get_ram_ptr(self, mr):
-        if (mr["alias"] != 0):
-            return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) +
-                    mr["alias_offset"])
-        return self.qemu_get_ram_ptr(mr["ram_addr"] & self.TARGET_PAGE_MASK)
-
-    def guest_phys_blocks_init(self):
-        self.guest_phys_blocks = []
-
-    def guest_phys_blocks_append(self):
-        print "guest RAM blocks:"
-        print ("target_start     target_end       host_addr        message "
-               "count")
-        print ("---------------- ---------------- ---------------- ------- "
-               "-----")
-
-        current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
-        current_map = current_map_p.dereference()
-        for cur in range(current_map["nr"]):
-            flat_range   = (current_map["ranges"] + cur).dereference()
-            mr           = flat_range["mr"].dereference()
-
-            # we only care about RAM
-            if (not mr["ram"]):
-                continue
-
-            section_size = self.int128_get64(flat_range["addr"]["size"])
-            target_start = self.int128_get64(flat_range["addr"]["start"])
-            target_end   = target_start + section_size
-            host_addr    = (self.memory_region_get_ram_ptr(mr) +
-                            flat_range["offset_in_region"])
-            predecessor = None
-
-            # find continuity in guest physical address space
-            if (len(self.guest_phys_blocks) > 0):
-                predecessor = self.guest_phys_blocks[-1]
-                predecessor_size = (predecessor["target_end"] -
-                                    predecessor["target_start"])
-
-                # the memory API guarantees monotonically increasing
-                # traversal
-                assert (predecessor["target_end"] <= target_start)
-
-                # we want continuity in both guest-physical and
-                # host-virtual memory
-                if (predecessor["target_end"] < target_start or
-                    predecessor["host_addr"] + predecessor_size != host_addr):
-                    predecessor = None
-
-            if (predecessor is None):
-                # isolated mapping, add it to the list
-                self.guest_phys_blocks.append({"target_start": target_start,
-                                               "target_end"  : target_end,
-                                               "host_addr"   : host_addr})
-                message = "added"
-            else:
-                # expand predecessor until @target_end; predecessor's
-                # start doesn't change
-                predecessor["target_end"] = target_end
-                message = "joined"
-
-            print ("%016x %016x %016x %-7s %5u" %
-                   (target_start, target_end, host_addr.cast(self.uintptr_t),
-                    message, len(self.guest_phys_blocks)))
-
-    def cpu_get_dump_info(self):
-        # We can't synchronize the registers with KVM post-mortem, and
-        # the bits in (first_x86_cpu->env.hflags) seem to be stale; they
-        # may not reflect long mode for example. Hence just assume the
-        # most common values. This also means that instruction pointer
-        # etc. will be bogus in the dump, but at least the RAM contents
-        # should be valid.
-        self.dump_info = {"d_machine": self.EM_X86_64,
-                          "d_endian" : self.ELFDATA2LSB,
-                          "d_class"  : self.ELFCLASS64}
-
-    def encode_elf64_ehdr_le(self):
-        return self.elf64_ehdr_le.pack(
-                                 self.ELFMAG,                 # e_ident/magic
-                                 self.dump_info["d_class"],   # e_ident/class
-                                 self.dump_info["d_endian"],  # e_ident/data
-                                 self.EV_CURRENT,             # e_ident/version
-                                 0,                           # e_ident/osabi
-                                 "",                          # e_ident/pad
-                                 self.ET_CORE,                # e_type
-                                 self.dump_info["d_machine"], # e_machine
-                                 self.EV_CURRENT,             # e_version
-                                 0,                           # e_entry
-                                 self.elf64_ehdr_le.size,     # e_phoff
-                                 0,                           # e_shoff
-                                 0,                           # e_flags
-                                 self.elf64_ehdr_le.size,     # e_ehsize
-                                 self.elf64_phdr_le.size,     # e_phentsize
-                                 self.phdr_num,               # e_phnum
-                                 0,                           # e_shentsize
-                                 0,                           # e_shnum
-                                 0                            # e_shstrndx
-                                )
-
-    def encode_elf64_note_le(self):
-        return self.elf64_phdr_le.pack(self.PT_NOTE,         # p_type
-                                       0,                    # p_flags
-                                       (self.memory_offset -
-                                        len(self.note)),     # p_offset
-                                       0,                    # p_vaddr
-                                       0,                    # p_paddr
-                                       len(self.note),       # p_filesz
-                                       len(self.note),       # p_memsz
-                                       0                     # p_align
-                                      )
-
-    def encode_elf64_load_le(self, offset, start_hwaddr, range_size):
-        return self.elf64_phdr_le.pack(self.PT_LOAD, # p_type
-                                       0,            # p_flags
-                                       offset,       # p_offset
-                                       0,            # p_vaddr
-                                       start_hwaddr, # p_paddr
-                                       range_size,   # p_filesz
-                                       range_size,   # p_memsz
-                                       0             # p_align
-                                      )
-
-    def note_init(self, name, desc, type):
-        # name must include a trailing NUL
-        namesz = (len(name) + 1 + 3) / 4 * 4
-        descsz = (len(desc)     + 3) / 4 * 4
-        fmt = ("<"   # little endian
-               "I"   # n_namesz
-               "I"   # n_descsz
-               "I"   # n_type
-               "%us" # name
-               "%us" # desc
-               % (namesz, descsz))
-        self.note = struct.pack(fmt,
-                                len(name) + 1, len(desc), type, name, desc)
-
-    def dump_init(self):
-        self.guest_phys_blocks_init()
-        self.guest_phys_blocks_append()
-        self.cpu_get_dump_info()
-        # we have no way to retrieve the VCPU status from KVM
-        # post-mortem
-        self.note_init("NONE", "EMPTY", 0)
-
-        # Account for PT_NOTE.
-        self.phdr_num = 1
-
-        # We should never reach PN_XNUM for paging=false dumps: there's
-        # just a handful of discontiguous ranges after merging.
-        self.phdr_num += len(self.guest_phys_blocks)
-        assert (self.phdr_num < self.PN_XNUM)
-
-        # Calculate the ELF file offset where the memory dump commences:
-        #
-        #   ELF header
-        #   PT_NOTE
-        #   PT_LOAD: 1
-        #   PT_LOAD: 2
-        #   ...
-        #   PT_LOAD: len(self.guest_phys_blocks)
-        #   ELF note
-        #   memory dump
-        self.memory_offset = (self.elf64_ehdr_le.size +
-                              self.elf64_phdr_le.size * self.phdr_num +
-                              len(self.note))
-
-    def dump_begin(self, vmcore):
-        vmcore.write(self.encode_elf64_ehdr_le())
-        vmcore.write(self.encode_elf64_note_le())
-        running = self.memory_offset
+        self.elf = None
+        self.guest_phys_blocks = None
+
+    def dump_init(self, vmcore):
+        """Prepares and writes ELF structures to core file."""
+
+        # Needed to make crash happy, data for more useful notes is
+        # not available in a qemu core.
+        self.elf.add_note("NONE", "EMPTY", 0)
+
+        # We should never reach PN_XNUM for paging=false dumps,
+        # there's just a handful of discontiguous ranges after
+        # merging.
+        # The constant is needed to account for the PT_NOTE segment.
+        phdr_num = len(self.guest_phys_blocks) + 1
+        assert phdr_num < PN_XNUM
+
         for block in self.guest_phys_blocks:
-            range_size = block["target_end"] - block["target_start"]
-            vmcore.write(self.encode_elf64_load_le(running,
-                                                   block["target_start"],
-                                                   range_size))
-            running += range_size
-        vmcore.write(self.note)
+            block_size = block["target_end"] - block["target_start"]
+            self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
+
+        self.elf.to_file(vmcore)
 
     def dump_iterate(self, vmcore):
+        """Writes guest core to file."""
+
         qemu_core = gdb.inferiors()[0]
         for block in self.guest_phys_blocks:
-            cur  = block["host_addr"]
+            cur = block["host_addr"]
             left = block["target_end"] - block["target_start"]
-            print ("dumping range at %016x for length %016x" %
-                   (cur.cast(self.uintptr_t), left))
-            while (left > 0):
-                chunk_size = min(self.TARGET_PAGE_SIZE, left)
+            print("dumping range at %016x for length %016x" %
+                  (cur.cast(UINTPTR_T), left))
+
+            while left > 0:
+                chunk_size = min(TARGET_PAGE_SIZE, left)
                 chunk = qemu_core.read_memory(cur, chunk_size)
                 vmcore.write(chunk)
-                cur  += chunk_size
+                cur += chunk_size
                 left -= chunk_size
 
-    def create_vmcore(self, filename):
-        vmcore = open(filename, "wb")
-        self.dump_begin(vmcore)
-        self.dump_iterate(vmcore)
-        vmcore.close()
-
     def invoke(self, args, from_tty):
+        """Handles command invocation from gdb."""
+
         # Unwittingly pressing the Enter key after the command should
         # not dump the same multi-gig coredump to the same file.
         self.dont_repeat()
 
         argv = gdb.string_to_argv(args)
-        if (len(argv) != 1):
-            raise gdb.GdbError("usage: dump-guest-memory FILE")
+        if len(argv) != 2:
+            raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
+
+        self.elf = ELF(argv[1])
+        self.guest_phys_blocks = get_guest_phys_blocks()
 
-        self.dump_init()
-        self.create_vmcore(argv[0])
+        with open(argv[0], "wb") as vmcore:
+            self.dump_init(vmcore)
+            self.dump_iterate(vmcore)
 
 DumpGuestMemory()
index 7e5d25612b4ec9f8324144913797ce4083680fac..d43e8f3e85f241b93137306b1187c4d49ee35ccb 100755 (executable)
 # the COPYING file in the top-level directory.
 
 import curses
-import sys, os, time, optparse, ctypes
-from ctypes import *
-
-class DebugfsProvider(object):
-    def __init__(self):
-        self.base = '/sys/kernel/debug/kvm'
-        self._fields = os.listdir(self.base)
-    def fields(self):
-        return self._fields
-    def select(self, fields):
-        self._fields = fields
-    def read(self):
-        def val(key):
-            return int(file(self.base + '/' + key).read())
-        return dict([(key, val(key)) for key in self._fields])
-
-vmx_exit_reasons = {
-    0: 'EXCEPTION_NMI',
-    1: 'EXTERNAL_INTERRUPT',
-    2: 'TRIPLE_FAULT',
-    7: 'PENDING_INTERRUPT',
-    8: 'NMI_WINDOW',
-    9: 'TASK_SWITCH',
-    10: 'CPUID',
-    12: 'HLT',
-    14: 'INVLPG',
-    15: 'RDPMC',
-    16: 'RDTSC',
-    18: 'VMCALL',
-    19: 'VMCLEAR',
-    20: 'VMLAUNCH',
-    21: 'VMPTRLD',
-    22: 'VMPTRST',
-    23: 'VMREAD',
-    24: 'VMRESUME',
-    25: 'VMWRITE',
-    26: 'VMOFF',
-    27: 'VMON',
-    28: 'CR_ACCESS',
-    29: 'DR_ACCESS',
-    30: 'IO_INSTRUCTION',
-    31: 'MSR_READ',
-    32: 'MSR_WRITE',
-    33: 'INVALID_STATE',
-    36: 'MWAIT_INSTRUCTION',
-    39: 'MONITOR_INSTRUCTION',
-    40: 'PAUSE_INSTRUCTION',
-    41: 'MCE_DURING_VMENTRY',
-    43: 'TPR_BELOW_THRESHOLD',
-    44: 'APIC_ACCESS',
-    48: 'EPT_VIOLATION',
-    49: 'EPT_MISCONFIG',
-    54: 'WBINVD',
-    55: 'XSETBV',
-    56: 'APIC_WRITE',
-    58: 'INVPCID',
+import sys
+import os
+import time
+import optparse
+import ctypes
+import fcntl
+import resource
+import struct
+import re
+from collections import defaultdict
+
+VMX_EXIT_REASONS = {
+    'EXCEPTION_NMI':        0,
+    'EXTERNAL_INTERRUPT':   1,
+    'TRIPLE_FAULT':         2,
+    'PENDING_INTERRUPT':    7,
+    'NMI_WINDOW':           8,
+    'TASK_SWITCH':          9,
+    'CPUID':                10,
+    'HLT':                  12,
+    'INVLPG':               14,
+    'RDPMC':                15,
+    'RDTSC':                16,
+    'VMCALL':               18,
+    'VMCLEAR':              19,
+    'VMLAUNCH':             20,
+    'VMPTRLD':              21,
+    'VMPTRST':              22,
+    'VMREAD':               23,
+    'VMRESUME':             24,
+    'VMWRITE':              25,
+    'VMOFF':                26,
+    'VMON':                 27,
+    'CR_ACCESS':            28,
+    'DR_ACCESS':            29,
+    'IO_INSTRUCTION':       30,
+    'MSR_READ':             31,
+    'MSR_WRITE':            32,
+    'INVALID_STATE':        33,
+    'MWAIT_INSTRUCTION':    36,
+    'MONITOR_INSTRUCTION':  39,
+    'PAUSE_INSTRUCTION':    40,
+    'MCE_DURING_VMENTRY':   41,
+    'TPR_BELOW_THRESHOLD':  43,
+    'APIC_ACCESS':          44,
+    'EPT_VIOLATION':        48,
+    'EPT_MISCONFIG':        49,
+    'WBINVD':               54,
+    'XSETBV':               55,
+    'APIC_WRITE':           56,
+    'INVPCID':              58,
 }
 
-svm_exit_reasons = {
-    0x000: 'READ_CR0',
-    0x003: 'READ_CR3',
-    0x004: 'READ_CR4',
-    0x008: 'READ_CR8',
-    0x010: 'WRITE_CR0',
-    0x013: 'WRITE_CR3',
-    0x014: 'WRITE_CR4',
-    0x018: 'WRITE_CR8',
-    0x020: 'READ_DR0',
-    0x021: 'READ_DR1',
-    0x022: 'READ_DR2',
-    0x023: 'READ_DR3',
-    0x024: 'READ_DR4',
-    0x025: 'READ_DR5',
-    0x026: 'READ_DR6',
-    0x027: 'READ_DR7',
-    0x030: 'WRITE_DR0',
-    0x031: 'WRITE_DR1',
-    0x032: 'WRITE_DR2',
-    0x033: 'WRITE_DR3',
-    0x034: 'WRITE_DR4',
-    0x035: 'WRITE_DR5',
-    0x036: 'WRITE_DR6',
-    0x037: 'WRITE_DR7',
-    0x040: 'EXCP_BASE',
-    0x060: 'INTR',
-    0x061: 'NMI',
-    0x062: 'SMI',
-    0x063: 'INIT',
-    0x064: 'VINTR',
-    0x065: 'CR0_SEL_WRITE',
-    0x066: 'IDTR_READ',
-    0x067: 'GDTR_READ',
-    0x068: 'LDTR_READ',
-    0x069: 'TR_READ',
-    0x06a: 'IDTR_WRITE',
-    0x06b: 'GDTR_WRITE',
-    0x06c: 'LDTR_WRITE',
-    0x06d: 'TR_WRITE',
-    0x06e: 'RDTSC',
-    0x06f: 'RDPMC',
-    0x070: 'PUSHF',
-    0x071: 'POPF',
-    0x072: 'CPUID',
-    0x073: 'RSM',
-    0x074: 'IRET',
-    0x075: 'SWINT',
-    0x076: 'INVD',
-    0x077: 'PAUSE',
-    0x078: 'HLT',
-    0x079: 'INVLPG',
-    0x07a: 'INVLPGA',
-    0x07b: 'IOIO',
-    0x07c: 'MSR',
-    0x07d: 'TASK_SWITCH',
-    0x07e: 'FERR_FREEZE',
-    0x07f: 'SHUTDOWN',
-    0x080: 'VMRUN',
-    0x081: 'VMMCALL',
-    0x082: 'VMLOAD',
-    0x083: 'VMSAVE',
-    0x084: 'STGI',
-    0x085: 'CLGI',
-    0x086: 'SKINIT',
-    0x087: 'RDTSCP',
-    0x088: 'ICEBP',
-    0x089: 'WBINVD',
-    0x08a: 'MONITOR',
-    0x08b: 'MWAIT',
-    0x08c: 'MWAIT_COND',
-    0x08d: 'XSETBV',
-    0x400: 'NPF',
+SVM_EXIT_REASONS = {
+    'READ_CR0':       0x000,
+    'READ_CR3':       0x003,
+    'READ_CR4':       0x004,
+    'READ_CR8':       0x008,
+    'WRITE_CR0':      0x010,
+    'WRITE_CR3':      0x013,
+    'WRITE_CR4':      0x014,
+    'WRITE_CR8':      0x018,
+    'READ_DR0':       0x020,
+    'READ_DR1':       0x021,
+    'READ_DR2':       0x022,
+    'READ_DR3':       0x023,
+    'READ_DR4':       0x024,
+    'READ_DR5':       0x025,
+    'READ_DR6':       0x026,
+    'READ_DR7':       0x027,
+    'WRITE_DR0':      0x030,
+    'WRITE_DR1':      0x031,
+    'WRITE_DR2':      0x032,
+    'WRITE_DR3':      0x033,
+    'WRITE_DR4':      0x034,
+    'WRITE_DR5':      0x035,
+    'WRITE_DR6':      0x036,
+    'WRITE_DR7':      0x037,
+    'EXCP_BASE':      0x040,
+    'INTR':           0x060,
+    'NMI':            0x061,
+    'SMI':            0x062,
+    'INIT':           0x063,
+    'VINTR':          0x064,
+    'CR0_SEL_WRITE':  0x065,
+    'IDTR_READ':      0x066,
+    'GDTR_READ':      0x067,
+    'LDTR_READ':      0x068,
+    'TR_READ':        0x069,
+    'IDTR_WRITE':     0x06a,
+    'GDTR_WRITE':     0x06b,
+    'LDTR_WRITE':     0x06c,
+    'TR_WRITE':       0x06d,
+    'RDTSC':          0x06e,
+    'RDPMC':          0x06f,
+    'PUSHF':          0x070,
+    'POPF':           0x071,
+    'CPUID':          0x072,
+    'RSM':            0x073,
+    'IRET':           0x074,
+    'SWINT':          0x075,
+    'INVD':           0x076,
+    'PAUSE':          0x077,
+    'HLT':            0x078,
+    'INVLPG':         0x079,
+    'INVLPGA':        0x07a,
+    'IOIO':           0x07b,
+    'MSR':            0x07c,
+    'TASK_SWITCH':    0x07d,
+    'FERR_FREEZE':    0x07e,
+    'SHUTDOWN':       0x07f,
+    'VMRUN':          0x080,
+    'VMMCALL':        0x081,
+    'VMLOAD':         0x082,
+    'VMSAVE':         0x083,
+    'STGI':           0x084,
+    'CLGI':           0x085,
+    'SKINIT':         0x086,
+    'RDTSCP':         0x087,
+    'ICEBP':          0x088,
+    'WBINVD':         0x089,
+    'MONITOR':        0x08a,
+    'MWAIT':          0x08b,
+    'MWAIT_COND':     0x08c,
+    'XSETBV':         0x08d,
+    'NPF':            0x400,
 }
 
 # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
-aarch64_exit_reasons = {
-    0x00: 'UNKNOWN',
-    0x01: 'WFI',
-    0x03: 'CP15_32',
-    0x04: 'CP15_64',
-    0x05: 'CP14_MR',
-    0x06: 'CP14_LS',
-    0x07: 'FP_ASIMD',
-    0x08: 'CP10_ID',
-    0x0C: 'CP14_64',
-    0x0E: 'ILL_ISS',
-    0x11: 'SVC32',
-    0x12: 'HVC32',
-    0x13: 'SMC32',
-    0x15: 'SVC64',
-    0x16: 'HVC64',
-    0x17: 'SMC64',
-    0x18: 'SYS64',
-    0x20: 'IABT',
-    0x21: 'IABT_HYP',
-    0x22: 'PC_ALIGN',
-    0x24: 'DABT',
-    0x25: 'DABT_HYP',
-    0x26: 'SP_ALIGN',
-    0x28: 'FP_EXC32',
-    0x2C: 'FP_EXC64',
-    0x2F: 'SERROR',
-    0x30: 'BREAKPT',
-    0x31: 'BREAKPT_HYP',
-    0x32: 'SOFTSTP',
-    0x33: 'SOFTSTP_HYP',
-    0x34: 'WATCHPT',
-    0x35: 'WATCHPT_HYP',
-    0x38: 'BKPT32',
-    0x3A: 'VECTOR32',
-    0x3C: 'BRK64',
+AARCH64_EXIT_REASONS = {
+    'UNKNOWN':      0x00,
+    'WFI':          0x01,
+    'CP15_32':      0x03,
+    'CP15_64':      0x04,
+    'CP14_MR':      0x05,
+    'CP14_LS':      0x06,
+    'FP_ASIMD':     0x07,
+    'CP10_ID':      0x08,
+    'CP14_64':      0x0C,
+    'ILL_ISS':      0x0E,
+    'SVC32':        0x11,
+    'HVC32':        0x12,
+    'SMC32':        0x13,
+    'SVC64':        0x15,
+    'HVC64':        0x16,
+    'SMC64':        0x17,
+    'SYS64':        0x18,
+    'IABT':         0x20,
+    'IABT_HYP':     0x21,
+    'PC_ALIGN':     0x22,
+    'DABT':         0x24,
+    'DABT_HYP':     0x25,
+    'SP_ALIGN':     0x26,
+    'FP_EXC32':     0x28,
+    'FP_EXC64':     0x2C,
+    'SERROR':       0x2F,
+    'BREAKPT':      0x30,
+    'BREAKPT_HYP':  0x31,
+    'SOFTSTP':      0x32,
+    'SOFTSTP_HYP':  0x33,
+    'WATCHPT':      0x34,
+    'WATCHPT_HYP':  0x35,
+    'BKPT32':       0x38,
+    'VECTOR32':     0x3A,
+    'BRK64':        0x3C,
 }
 
 # From include/uapi/linux/kvm.h, KVM_EXIT_xxx
-userspace_exit_reasons = {
-     0: 'UNKNOWN',
-     1: 'EXCEPTION',
-     2: 'IO',
-     3: 'HYPERCALL',
-     4: 'DEBUG',
-     5: 'HLT',
-     6: 'MMIO',
-     7: 'IRQ_WINDOW_OPEN',
-     8: 'SHUTDOWN',
-     9: 'FAIL_ENTRY',
-    10: 'INTR',
-    11: 'SET_TPR',
-    12: 'TPR_ACCESS',
-    13: 'S390_SIEIC',
-    14: 'S390_RESET',
-    15: 'DCR',
-    16: 'NMI',
-    17: 'INTERNAL_ERROR',
-    18: 'OSI',
-    19: 'PAPR_HCALL',
-    20: 'S390_UCONTROL',
-    21: 'WATCHDOG',
-    22: 'S390_TSCH',
-    23: 'EPR',
-    24: 'SYSTEM_EVENT',
+USERSPACE_EXIT_REASONS = {
+    'UNKNOWN':          0,
+    'EXCEPTION':        1,
+    'IO':               2,
+    'HYPERCALL':        3,
+    'DEBUG':            4,
+    'HLT':              5,
+    'MMIO':             6,
+    'IRQ_WINDOW_OPEN':  7,
+    'SHUTDOWN':         8,
+    'FAIL_ENTRY':       9,
+    'INTR':             10,
+    'SET_TPR':          11,
+    'TPR_ACCESS':       12,
+    'S390_SIEIC':       13,
+    'S390_RESET':       14,
+    'DCR':              15,
+    'NMI':              16,
+    'INTERNAL_ERROR':   17,
+    'OSI':              18,
+    'PAPR_HCALL':       19,
+    'S390_UCONTROL':    20,
+    'WATCHDOG':         21,
+    'S390_TSCH':        22,
+    'EPR':              23,
+    'SYSTEM_EVENT':     24,
 }
 
-x86_exit_reasons = {
-    'vmx': vmx_exit_reasons,
-    'svm': svm_exit_reasons,
+IOCTL_NUMBERS = {
+    'SET_FILTER':  0x40082406,
+    'ENABLE':      0x00002400,
+    'DISABLE':     0x00002401,
+    'RESET':       0x00002403,
 }
 
-sc_perf_evt_open = None
-exit_reasons = None
+class Arch(object):
+    """Class that encapsulates global architecture specific data like
+    syscall and ioctl numbers.
+
+    """
+    @staticmethod
+    def get_arch():
+        machine = os.uname()[4]
+
+        if machine.startswith('ppc'):
+            return ArchPPC()
+        elif machine.startswith('aarch64'):
+            return ArchA64()
+        elif machine.startswith('s390'):
+            return ArchS390()
+        else:
+            # X86_64
+            for line in open('/proc/cpuinfo'):
+                if not line.startswith('flags'):
+                    continue
+
+                flags = line.split()
+                if 'vmx' in flags:
+                    return ArchX86(VMX_EXIT_REASONS)
+                if 'svm' in flags:
+                    return ArchX86(SVM_EXIT_REASONS)
+                return
+
+class ArchX86(Arch):
+    def __init__(self, exit_reasons):
+        self.sc_perf_evt_open = 298
+        self.ioctl_numbers = IOCTL_NUMBERS
+        self.exit_reasons = exit_reasons
+
+class ArchPPC(Arch):
+    def __init__(self):
+        self.sc_perf_evt_open = 319
+        self.ioctl_numbers = IOCTL_NUMBERS
+        self.ioctl_numbers['ENABLE'] = 0x20002400
+        self.ioctl_numbers['DISABLE'] = 0x20002401
 
-ioctl_numbers = {
-    'SET_FILTER' : 0x40082406,
-    'ENABLE'     : 0x00002400,
-    'DISABLE'    : 0x00002401,
-    'RESET'      : 0x00002403,
-}
+        # PPC comes in 32 and 64 bit and some generated ioctl
+        # numbers depend on the wordsize.
+        char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
+        self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
+
+class ArchA64(Arch):
+    def __init__(self):
+        self.sc_perf_evt_open = 241
+        self.ioctl_numbers = IOCTL_NUMBERS
+        self.exit_reasons = AARCH64_EXIT_REASONS
+
+class ArchS390(Arch):
+    def __init__(self):
+        self.sc_perf_evt_open = 331
+        self.ioctl_numbers = IOCTL_NUMBERS
+        self.exit_reasons = None
+
+ARCH = Arch.get_arch()
+
+
+def walkdir(path):
+    """Returns os.walk() data for specified directory.
+
+    As it is only a wrapper it returns the same 3-tuple of (dirpath,
+    dirnames, filenames).
+    """
+    return next(os.walk(path))
+
+
+def parse_int_list(list_string):
+    """Returns an int list from a string of comma separated integers and
+    integer ranges."""
+    integers = []
+    members = list_string.split(',')
 
-def x86_init(flag):
-    globals().update({
-        'sc_perf_evt_open' : 298,
-        'exit_reasons' : x86_exit_reasons[flag],
-    })
-
-def s390_init():
-    globals().update({
-        'sc_perf_evt_open' : 331
-    })
-
-def ppc_init():
-    globals().update({
-        'sc_perf_evt_open' : 319,
-        'ioctl_numbers' : {
-            'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16),
-            'ENABLE'     : 0x20002400,
-            'DISABLE'    : 0x20002401,
-        }
-    })
-
-def aarch64_init():
-    globals().update({
-        'sc_perf_evt_open' : 241,
-        'exit_reasons' : aarch64_exit_reasons,
-    })
-
-def detect_platform():
-    if os.uname()[4].startswith('ppc'):
-        ppc_init()
-        return
-    elif os.uname()[4].startswith('aarch64'):
-        aarch64_init()
-        return
-
-    for line in file('/proc/cpuinfo').readlines():
-        if line.startswith('flags'):
-            for flag in line.split():
-                if flag in x86_exit_reasons:
-                    x86_init(flag)
-                    return
-        elif line.startswith('vendor_id'):
-            for flag in line.split():
-                if flag == 'IBM/S390':
-                    s390_init()
-                    return
-
-detect_platform()
-
-def invert(d):
-    return dict((x[1], x[0]) for x in d.iteritems())
-
-filters = {}
-filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons))
-if exit_reasons:
-    filters['kvm_exit'] = ('exit_reason', invert(exit_reasons))
-
-import struct, array
-
-libc = ctypes.CDLL('libc.so.6')
+    for member in members:
+        if '-' not in member:
+            integers.append(int(member))
+        else:
+            int_range = member.split('-')
+            integers.extend(range(int(int_range[0]),
+                                  int(int_range[1]) + 1))
+
+    return integers
+
+
+def get_online_cpus():
+    with open('/sys/devices/system/cpu/online') as cpu_list:
+        cpu_string = cpu_list.readline()
+        return parse_int_list(cpu_string)
+
+
+def get_filters():
+    filters = {}
+    filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
+    if ARCH.exit_reasons:
+        filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
+    return filters
+
+libc = ctypes.CDLL('libc.so.6', use_errno=True)
 syscall = libc.syscall
-get_errno = libc.__errno_location
-get_errno.restype = POINTER(c_int)
 
 class perf_event_attr(ctypes.Structure):
     _fields_ = [('type', ctypes.c_uint32),
@@ -305,262 +331,350 @@ class perf_event_attr(ctypes.Structure):
                 ('bp_addr', ctypes.c_uint64),
                 ('bp_len', ctypes.c_uint64),
                 ]
-def _perf_event_open(attr, pid, cpu, group_fd, flags):
-    return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid),
-                   ctypes.c_int(cpu), ctypes.c_int(group_fd),
-                   ctypes.c_long(flags))
-
-PERF_TYPE_HARDWARE              = 0
-PERF_TYPE_SOFTWARE              = 1
-PERF_TYPE_TRACEPOINT            = 2
-PERF_TYPE_HW_CACHE              = 3
-PERF_TYPE_RAW                   = 4
-PERF_TYPE_BREAKPOINT            = 5
-
-PERF_SAMPLE_IP                  = 1 << 0
-PERF_SAMPLE_TID                 = 1 << 1
-PERF_SAMPLE_TIME                = 1 << 2
-PERF_SAMPLE_ADDR                = 1 << 3
-PERF_SAMPLE_READ                = 1 << 4
-PERF_SAMPLE_CALLCHAIN           = 1 << 5
-PERF_SAMPLE_ID                  = 1 << 6
-PERF_SAMPLE_CPU                 = 1 << 7
-PERF_SAMPLE_PERIOD              = 1 << 8
-PERF_SAMPLE_STREAM_ID           = 1 << 9
-PERF_SAMPLE_RAW                 = 1 << 10
-
-PERF_FORMAT_TOTAL_TIME_ENABLED  = 1 << 0
-PERF_FORMAT_TOTAL_TIME_RUNNING  = 1 << 1
-PERF_FORMAT_ID                  = 1 << 2
-PERF_FORMAT_GROUP               = 1 << 3
 
-import re
+    def __init__(self):
+        super(self.__class__, self).__init__()
+        self.type = PERF_TYPE_TRACEPOINT
+        self.size = ctypes.sizeof(self)
+        self.read_format = PERF_FORMAT_GROUP
+
+def perf_event_open(attr, pid, cpu, group_fd, flags):
+    return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
+                   ctypes.c_int(pid), ctypes.c_int(cpu),
+                   ctypes.c_int(group_fd), ctypes.c_long(flags))
 
-sys_tracing = '/sys/kernel/debug/tracing'
+PERF_TYPE_TRACEPOINT = 2
+PERF_FORMAT_GROUP = 1 << 3
+
+PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
+PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
 
 class Group(object):
-    def __init__(self, cpu):
+    def __init__(self):
         self.events = []
-        self.group_leader = None
-        self.cpu = cpu
-    def add_event(self, name, event_set, tracepoint, filter = None):
-        self.events.append(Event(group = self,
-                                 name = name, event_set = event_set,
-                                 tracepoint = tracepoint, filter = filter))
-        if len(self.events) == 1:
-            self.file = os.fdopen(self.events[0].fd)
+
+    def add_event(self, event):
+        self.events.append(event)
+
     def read(self):
-        bytes = 8 * (1 + len(self.events))
-        fmt = 'xxxxxxxx' + 'q' * len(self.events)
+        length = 8 * (1 + len(self.events))
+        read_format = 'xxxxxxxx' + 'Q' * len(self.events)
         return dict(zip([event.name for event in self.events],
-                        struct.unpack(fmt, self.file.read(bytes))))
+                        struct.unpack(read_format,
+                                      os.read(self.events[0].fd, length))))
 
 class Event(object):
-    def __init__(self, group, name, event_set, tracepoint, filter = None):
+    def __init__(self, name, group, trace_cpu, trace_point, trace_filter,
+                 trace_set='kvm'):
         self.name = name
-        attr = perf_event_attr()
-        attr.type = PERF_TYPE_TRACEPOINT
-        attr.size = ctypes.sizeof(attr)
-        id_path = os.path.join(sys_tracing, 'events', event_set,
-                               tracepoint, 'id')
-        id = int(file(id_path).read())
-        attr.config = id
-        attr.sample_type = (PERF_SAMPLE_RAW
-                            | PERF_SAMPLE_TIME
-                            | PERF_SAMPLE_CPU)
-        attr.sample_period = 1
-        attr.read_format = PERF_FORMAT_GROUP
+        self.fd = None
+        self.setup_event(group, trace_cpu, trace_point, trace_filter,
+                         trace_set)
+
+    def setup_event_attribute(self, trace_set, trace_point):
+        id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
+                               trace_point, 'id')
+
+        event_attr = perf_event_attr()
+        event_attr.config = int(open(id_path).read())
+        return event_attr
+
+    def setup_event(self, group, trace_cpu, trace_point, trace_filter,
+                    trace_set):
+        event_attr = self.setup_event_attribute(trace_set, trace_point)
+
         group_leader = -1
         if group.events:
             group_leader = group.events[0].fd
-        fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0)
+
+        fd = perf_event_open(event_attr, -1, trace_cpu,
+                             group_leader, 0)
         if fd == -1:
-            err = get_errno()[0]
-            raise Exception('perf_event_open failed, errno = ' + err.__str__())
-        if filter:
-            import fcntl
-            fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter)
+            err = ctypes.get_errno()
+            raise OSError(err, os.strerror(err),
+                          'while calling sys_perf_event_open().')
+
+        if trace_filter:
+            fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
+                        trace_filter)
+
         self.fd = fd
+
     def enable(self):
-        import fcntl
-        fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0)
+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
+
     def disable(self):
-        import fcntl
-        fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0)
+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
+
     def reset(self):
-        import fcntl
-        fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0)
+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
 
 class TracepointProvider(object):
     def __init__(self):
-        path = os.path.join(sys_tracing, 'events', 'kvm')
-        fields = [f
-                  for f in os.listdir(path)
-                  if os.path.isdir(os.path.join(path, f))]
+        self.group_leaders = []
+        self.filters = get_filters()
+        self._fields = self.get_available_fields()
+        self.setup_traces()
+        self.fields = self._fields
+
+    def get_available_fields(self):
+        path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
+        fields = walkdir(path)[1]
         extra = []
-        for f in fields:
-            if f in filters:
-                subfield, values = filters[f]
-                for name, number in values.iteritems():
-                    extra.append(f + '(' + name + ')')
+        for field in fields:
+            if field in self.filters:
+                filter_name_, filter_dicts = self.filters[field]
+                for name in filter_dicts:
+                    extra.append(field + '(' + name + ')')
         fields += extra
-        self._setup(fields)
-        self.select(fields)
-    def fields(self):
-        return self._fields
+        return fields
+
+    def setup_traces(self):
+        cpus = get_online_cpus()
+
+        # The constant is needed as a buffer for python libs, std
+        # streams and other files that the script opens.
+        newlim = len(cpus) * len(self._fields) + 50
+        try:
+            softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
+
+            if hardlim < newlim:
+                # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
+                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
+            else:
+                # Raising the soft limit is sufficient.
+                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
+
+        except ValueError:
+            sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
 
-    def _online_cpus(self):
-        l = []
-        pattern = r'cpu([0-9]+)'
-        basedir = '/sys/devices/system/cpu'
-        for entry in os.listdir(basedir):
-            match = re.match(pattern, entry)
-            if not match:
-                continue
-            path = os.path.join(basedir, entry, 'online')
-            if os.path.exists(path) and open(path).read().strip() != '1':
-                continue
-            l.append(int(match.group(1)))
-        return l
-
-    def _setup(self, _fields):
-        self._fields = _fields
-        cpus = self._online_cpus()
-        import resource
-        nfiles = len(cpus) * 1000
-        resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles))
-        events = []
-        self.group_leaders = []
         for cpu in cpus:
-            group = Group(cpu)
-            for name in _fields:
+            group = Group()
+            for name in self._fields:
                 tracepoint = name
-                filter = None
-                m = re.match(r'(.*)\((.*)\)', name)
-                if m:
-                    tracepoint, sub = m.groups()
-                    filter = '%s==%d\0' % (filters[tracepoint][0],
-                                           filters[tracepoint][1][sub])
-                event = group.add_event(name, event_set = 'kvm',
-                                        tracepoint = tracepoint,
-                                        filter = filter)
+                tracefilter = None
+                match = re.match(r'(.*)\((.*)\)', name)
+                if match:
+                    tracepoint, sub = match.groups()
+                    tracefilter = ('%s==%d\0' %
+                                   (self.filters[tracepoint][0],
+                                    self.filters[tracepoint][1][sub]))
+
+                group.add_event(Event(name=name,
+                                      group=group,
+                                      trace_cpu=cpu,
+                                      trace_point=tracepoint,
+                                      trace_filter=tracefilter))
             self.group_leaders.append(group)
-    def select(self, fields):
+
+    def available_fields(self):
+        return self.get_available_fields()
+
+    @property
+    def fields(self):
+        return self._fields
+
+    @fields.setter
+    def fields(self, fields):
+        self._fields = fields
         for group in self.group_leaders:
-            for event in group.events:
+            for index, event in enumerate(group.events):
                 if event.name in fields:
                     event.reset()
                     event.enable()
                 else:
-                    event.disable()
+                    # Do not disable the group leader.
+                    # It would disable all of its events.
+                    if index != 0:
+                        event.disable()
+
     def read(self):
-        from collections import defaultdict
         ret = defaultdict(int)
         for group in self.group_leaders:
             for name, val in group.read().iteritems():
-                ret[name] += val
+                if name in self._fields:
+                    ret[name] += val
         return ret
 
-class Stats:
-    def __init__(self, providers, fields = None):
+class DebugfsProvider(object):
+    def __init__(self):
+        self._fields = self.get_available_fields()
+
+    def get_available_fields(self):
+        return walkdir(PATH_DEBUGFS_KVM)[2]
+
+    @property
+    def fields(self):
+        return self._fields
+
+    @fields.setter
+    def fields(self, fields):
+        self._fields = fields
+
+    def read(self):
+        def val(key):
+            return int(file(PATH_DEBUGFS_KVM + '/' + key).read())
+        return dict([(key, val(key)) for key in self._fields])
+
+class Stats(object):
+    def __init__(self, providers, fields=None):
         self.providers = providers
-        self.fields_filter = fields
-        self._update()
-    def _update(self):
+        self._fields_filter = fields
+        self.values = {}
+        self.update_provider_filters()
+
+    def update_provider_filters(self):
         def wanted(key):
-            import re
-            if not self.fields_filter:
+            if not self._fields_filter:
                 return True
-            return re.match(self.fields_filter, key) is not None
-        self.values = dict()
-        for d in providers:
-            provider_fields = [key for key in d.fields() if wanted(key)]
-            for key in provider_fields:
-                self.values[key] = None
-            d.select(provider_fields)
-    def set_fields_filter(self, fields_filter):
-        self.fields_filter = fields_filter
-        self._update()
+            return re.match(self._fields_filter, key) is not None
+
+        # As we reset the counters when updating the fields we can
+        # also clear the cache of old values.
+        self.values = {}
+        for provider in self.providers:
+            provider_fields = [key for key in provider.get_available_fields()
+                               if wanted(key)]
+            provider.fields = provider_fields
+
+    @property
+    def fields_filter(self):
+        return self._fields_filter
+
+    @fields_filter.setter
+    def fields_filter(self, fields_filter):
+        self._fields_filter = fields_filter
+        self.update_provider_filters()
+
     def get(self):
-        for d in providers:
-            new = d.read()
-            for key in d.fields():
+        for provider in self.providers:
+            new = provider.read()
+            for key in provider.fields:
                 oldval = self.values.get(key, (0, 0))
-                newval = new[key]
+                newval = new.get(key, 0)
                 newdelta = None
                 if oldval is not None:
                     newdelta = newval - oldval[0]
                 self.values[key] = (newval, newdelta)
         return self.values
 
-if not os.access('/sys/kernel/debug', os.F_OK):
-    print 'Please enable CONFIG_DEBUG_FS in your kernel'
-    sys.exit(1)
-if not os.access('/sys/kernel/debug/kvm', os.F_OK):
-    print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
-    print "and ensure the kvm modules are loaded"
-    sys.exit(1)
-
-label_width = 40
-number_width = 10
-
-def tui(screen, stats):
-    curses.use_default_colors()
-    curses.noecho()
-    drilldown = False
-    fields_filter = stats.fields_filter
-    def update_drilldown():
-        if not fields_filter:
-            if drilldown:
-                stats.set_fields_filter(None)
-            else:
-                stats.set_fields_filter(r'^[^\(]*$')
-    update_drilldown()
-    def refresh(sleeptime):
-        screen.erase()
-        screen.addstr(0, 0, 'kvm statistics')
-        screen.addstr(2, 1, 'Event')
-        screen.addstr(2, 1 + label_width + number_width - len('Total'), 'Total')
-        screen.addstr(2, 1 + label_width + number_width + 8 - len('Current'), 'Current')
+LABEL_WIDTH = 40
+NUMBER_WIDTH = 10
+
+class Tui(object):
+    def __init__(self, stats):
+        self.stats = stats
+        self.screen = None
+        self.drilldown = False
+        self.update_drilldown()
+
+    def __enter__(self):
+        """Initialises curses for later use.  Based on curses.wrapper
+           implementation from the Python standard library."""
+        self.screen = curses.initscr()
+        curses.noecho()
+        curses.cbreak()
+
+        # The try/catch works around a minor bit of
+        # over-conscientiousness in the curses module, the error
+        # return from C start_color() is ignorable.
+        try:
+            curses.start_color()
+        except:
+            pass
+
+        curses.use_default_colors()
+        return self
+
+    def __exit__(self, *exception):
+        """Resets the terminal to its normal state.  Based on curses.wrappre
+           implementation from the Python standard library."""
+        if self.screen:
+            self.screen.keypad(0)
+            curses.echo()
+            curses.nocbreak()
+            curses.endwin()
+
+    def update_drilldown(self):
+        if not self.stats.fields_filter:
+            self.stats.fields_filter = r'^[^\(]*$'
+
+        elif self.stats.fields_filter == r'^[^\(]*$':
+            self.stats.fields_filter = None
+
+    def refresh(self, sleeptime):
+        self.screen.erase()
+        self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
+        self.screen.addstr(2, 1, 'Event')
+        self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
+                           len('Total'), 'Total')
+        self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
+                           len('Current'), 'Current')
         row = 3
-        s = stats.get()
+        stats = self.stats.get()
         def sortkey(x):
-            if s[x][1]:
-                return (-s[x][1], -s[x][0])
+            if stats[x][1]:
+                return (-stats[x][1], -stats[x][0])
             else:
-                return (0, -s[x][0])
-        for key in sorted(s.keys(), key = sortkey):
-            if row >= screen.getmaxyx()[0]:
+                return (0, -stats[x][0])
+        for key in sorted(stats.keys(), key=sortkey):
+
+            if row >= self.screen.getmaxyx()[0]:
                 break
-            values = s[key]
+            values = stats[key]
             if not values[0] and not values[1]:
                 break
             col = 1
-            screen.addstr(row, col, key)
-            col += label_width
-            screen.addstr(row, col, '%10d' % (values[0],))
-            col += number_width
+            self.screen.addstr(row, col, key)
+            col += LABEL_WIDTH
+            self.screen.addstr(row, col, '%10d' % (values[0],))
+            col += NUMBER_WIDTH
             if values[1] is not None:
-                screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
+                self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
             row += 1
-        screen.refresh()
+        self.screen.refresh()
+
+    def show_filter_selection(self):
+        while True:
+            self.screen.erase()
+            self.screen.addstr(0, 0,
+                               "Show statistics for events matching a regex.",
+                               curses.A_BOLD)
+            self.screen.addstr(2, 0,
+                               "Current regex: {0}"
+                               .format(self.stats.fields_filter))
+            self.screen.addstr(3, 0, "New regex: ")
+            curses.echo()
+            regex = self.screen.getstr()
+            curses.noecho()
+            if len(regex) == 0:
+                return
+            try:
+                re.compile(regex)
+                self.stats.fields_filter = regex
+                return
+            except re.error:
+                continue
 
-    sleeptime = 0.25
-    while True:
-        refresh(sleeptime)
-        curses.halfdelay(int(sleeptime * 10))
-        sleeptime = 3
-        try:
-            c = screen.getkey()
-            if c == 'x':
-                drilldown = not drilldown
-                update_drilldown()
-            if c == 'q':
+    def show_stats(self):
+        sleeptime = 0.25
+        while True:
+            self.refresh(sleeptime)
+            curses.halfdelay(int(sleeptime * 10))
+            sleeptime = 3
+            try:
+                char = self.screen.getkey()
+                if char == 'x':
+                    self.drilldown = not self.drilldown
+                    self.update_drilldown()
+                if char == 'q':
+                    break
+                if char == 'f':
+                    self.show_filter_selection()
+            except KeyboardInterrupt:
                 break
-        except KeyboardInterrupt:
-            break
-        except curses.error:
-            continue
+            except curses.error:
+                continue
 
 def batch(stats):
     s = stats.get()
@@ -568,13 +682,13 @@ def batch(stats):
     s = stats.get()
     for key in sorted(s.keys()):
         values = s[key]
-        print '%-22s%10d%10d' % (key, values[0], values[1])
+        print '%-42s%10d%10d' % (key, values[0], values[1])
 
 def log(stats):
     keys = sorted(stats.get().iterkeys())
     def banner():
         for k in keys:
-            print '%10s' % k[0:9],
+            print '%s' % k,
         print
     def statline():
         s = stats.get()
@@ -590,57 +704,110 @@ def log(stats):
         statline()
         line += 1
 
-options = optparse.OptionParser()
-options.add_option('-1', '--once', '--batch',
-                   action = 'store_true',
-                   default = False,
-                   dest = 'once',
-                   help = 'run in batch mode for one second',
-                   )
-options.add_option('-l', '--log',
-                   action = 'store_true',
-                   default = False,
-                   dest = 'log',
-                   help = 'run in logging mode (like vmstat)',
-                   )
-options.add_option('-t', '--tracepoints',
-                   action = 'store_true',
-                   default = False,
-                   dest = 'tracepoints',
-                   help = 'retrieve statistics from tracepoints',
-                   )
-options.add_option('-d', '--debugfs',
-                   action = 'store_true',
-                   default = False,
-                   dest = 'debugfs',
-                   help = 'retrieve statistics from debugfs',
-                   )
-options.add_option('-f', '--fields',
-                   action = 'store',
-                   default = None,
-                   dest = 'fields',
-                   help = 'fields to display (regex)',
-                   )
-(options, args) = options.parse_args(sys.argv)
-
-providers = []
-if options.tracepoints:
-    providers.append(TracepointProvider())
-if options.debugfs:
-    providers.append(DebugfsProvider())
-
-if len(providers) == 0:
-    try:
-        providers = [TracepointProvider()]
-    except:
-        providers = [DebugfsProvider()]
-
-stats = Stats(providers, fields = options.fields)
-
-if options.log:
-    log(stats)
-elif not options.once:
-    import curses.wrapper
-    curses.wrapper(tui, stats)
-else:
-    batch(stats)
+def get_options():
+    description_text = """
+This script displays various statistics about VMs running under KVM.
+The statistics are gathered from the KVM debugfs entries and / or the
+currently available perf traces.
+
+The monitoring takes additional cpu cycles and might affect the VM's
+performance.
+
+Requirements:
+- Access to:
+    /sys/kernel/debug/kvm
+    /sys/kernel/debug/trace/events/*
+    /proc/pid/task
+- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
+  CAP_SYS_ADMIN and perf events are used.
+- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
+  the large number of files that are possibly opened.
+"""
+
+    class PlainHelpFormatter(optparse.IndentedHelpFormatter):
+        def format_description(self, description):
+            if description:
+                return description + "\n"
+            else:
+                return ""
+
+    optparser = optparse.OptionParser(description=description_text,
+                                      formatter=PlainHelpFormatter())
+    optparser.add_option('-1', '--once', '--batch',
+                         action='store_true',
+                         default=False,
+                         dest='once',
+                         help='run in batch mode for one second',
+                         )
+    optparser.add_option('-l', '--log',
+                         action='store_true',
+                         default=False,
+                         dest='log',
+                         help='run in logging mode (like vmstat)',
+                         )
+    optparser.add_option('-t', '--tracepoints',
+                         action='store_true',
+                         default=False,
+                         dest='tracepoints',
+                         help='retrieve statistics from tracepoints',
+                         )
+    optparser.add_option('-d', '--debugfs',
+                         action='store_true',
+                         default=False,
+                         dest='debugfs',
+                         help='retrieve statistics from debugfs',
+                         )
+    optparser.add_option('-f', '--fields',
+                         action='store',
+                         default=None,
+                         dest='fields',
+                         help='fields to display (regex)',
+                         )
+    (options, _) = optparser.parse_args(sys.argv)
+    return options
+
+def get_providers(options):
+    providers = []
+
+    if options.tracepoints:
+        providers.append(TracepointProvider())
+    if options.debugfs:
+        providers.append(DebugfsProvider())
+    if len(providers) == 0:
+        providers.append(TracepointProvider())
+
+    return providers
+
+def check_access():
+    if not os.path.exists('/sys/kernel/debug'):
+        sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
+        sys.exit(1)
+
+    if not os.path.exists(PATH_DEBUGFS_KVM):
+        sys.stderr.write("Please make sure, that debugfs is mounted and "
+                         "readable by the current user:\n"
+                         "('mount -t debugfs debugfs /sys/kernel/debug')\n"
+                         "Also ensure, that the kvm modules are loaded.\n")
+        sys.exit(1)
+
+    if not os.path.exists(PATH_DEBUGFS_TRACING):
+        sys.stderr.write("Please make {0} readable by the current user.\n"
+                         .format(PATH_DEBUGFS_TRACING))
+        sys.exit(1)
+
+def main():
+    check_access()
+    options = get_options()
+    providers = get_providers(options)
+    stats = Stats(providers, fields=options.fields)
+
+    if options.log:
+        log(stats)
+    elif not options.once:
+        with Tui(stats) as tui:
+            tui.show_stats()
+    else:
+        batch(stats)
+
+if __name__ == "__main__":
+    main()
index b7352f1a35df2b155952c15b1ce03859d16abb3c..650e654ec228ea54ff177370c7d47665e1950a72 100644 (file)
@@ -558,7 +558,7 @@ tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y)
 tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
 tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
 tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
-tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y)
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y)
 tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
 tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)