]> git.proxmox.com Git - qemu.git/commitdiff
Add KVM support to QEMU
authoraliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
Wed, 5 Nov 2008 16:04:33 +0000 (16:04 +0000)
committeraliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
Wed, 5 Nov 2008 16:04:33 +0000 (16:04 +0000)
This patch adds very basic KVM support.  KVM is a kernel module for Linux that
allows userspace programs to make use of hardware virtualization support.  It
current supports x86 hardware virtualization using Intel VT-x or AMD-V.  It
also supports IA64 VT-i, PPC 440, and S390.

This patch only implements the bare minimum support to get a guest booting.  It
has very little impact the rest of QEMU and attempts to integrate nicely with
the rest of QEMU.

Even though this implementation is basic, it is significantly faster than TCG.
Booting and shutting down a Linux guest:

w/TCG:  1:32.36 elapsed  84% CPU

w/KVM:  0:31.14 elapsed  59% CPU

Right now, KVM is disabled by default and must be explicitly enabled with
 -enable-kvm.  We can enable it by default later when we have had better
testing.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5627 c046a42c-6fe2-441c-8c8c-71466251a162

Makefile.target
configure
cpu-defs.h
cpu-exec.c
exec.c
hw/acpi.c
monitor.c
target-i386/cpu.h
target-i386/helper.c
vl.c

index 62168c246fccef770560ecb8d25cb3aec59da0ff..00267e7920a98f2a63e65a6434d8fc96fc019908 100644 (file)
@@ -183,6 +183,9 @@ CFLAGS+=-I/opt/SUNWspro/prod/include/cc
 endif
 endif
 
+kvm.o: CFLAGS+=$(KVM_CFLAGS)
+kvm-all.o: CFLAGS+=$(KVM_CFLAGS)
+
 all: $(PROGS)
 
 #########################################################
@@ -581,6 +584,9 @@ ifndef CONFIG_USER_ONLY
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
 OBJS+=fw_cfg.o aio.o buffered_file.o migration.o migration-tcp.o qemu-char.o
 OBJS+=net.o
+ifdef CONFIG_KVM
+OBJS+=kvm.o kvm-all.o
+endif
 ifdef CONFIG_WIN32
 OBJS+=block-raw-win32.o
 else
index 8b0ad89b12e09cf0ae71243ebba2cc847123e100..53167d5df7b67b5b68879ed93d4c057aecf9d220 100755 (executable)
--- a/configure
+++ b/configure
@@ -115,6 +115,7 @@ aio="yes"
 nptl="yes"
 mixemu="no"
 bluez="yes"
+kvm="yes"
 
 # OS specific
 targetos=`uname -s`
@@ -303,6 +304,8 @@ for opt do
   ;;
   --disable-bluez) bluez="no"
   ;;
+  --disable-kvm) kvm="no"
+  ;;
   --enable-profiler) profiler="yes"
   ;;
   --enable-cocoa)
@@ -448,6 +451,7 @@ echo "  --disable-brlapi         disable BrlAPI"
 echo "  --disable-vnc-tls        disable TLS encryption for VNC server"
 echo "  --disable-curses         disable curses output"
 echo "  --disable-bluez          disable bluez stack connectivity"
+echo "  --disable-kvm            disable KVM acceleration support"
 echo "  --disable-nptl           disable usermode NPTL support"
 echo "  --enable-system          enable all system emulation targets"
 echo "  --disable-system         disable all system emulation targets"
@@ -950,6 +954,30 @@ EOF
   fi
 fi
 
+##########################################
+# kvm probe
+if test "$kvm" = "yes" ; then
+    cat > $TMPC <<EOF
+#include <linux/kvm.h>
+#if !defined(KVM_API_VERSION) || \
+    KVM_API_VERSION < 12 || \
+    KVM_API_VERSION > 12 || \
+    !defined(KVM_CAP_USER_MEMORY) || \
+    !defined(KVM_CAP_SET_TSS_ADDR)
+#error Invalid KVM version
+#endif
+int main(void) { return 0; }
+EOF
+  # FIXME make this configurable
+  kvm_cflags=-I/lib/modules/`uname -r`/build/include
+  if $cc $ARCH_CFLAGS -o $TMPE ${OS_CFLAGS} $kvm_cflags $TMPC \
+      2>/dev/null ; then
+    :
+  else
+    kvm="no"
+  fi
+fi
+
 ##########################################
 # AIO probe
 if test "$aio" = "yes" ; then
@@ -1036,6 +1064,7 @@ echo "uname -r          $uname_release"
 echo "NPTL support      $nptl"
 echo "vde support       $vde"
 echo "AIO support       $aio"
+echo "KVM support       $kvm"
 
 if test $sdl_too_old = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -1411,6 +1440,15 @@ interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"`
 echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h
 gdb_xml_files=""
 
+# FIXME allow i386 to build on x86_64 and vice versa
+if test "$kvm" = "yes" -a "$target_cpu" != "$cpu" ; then
+  kvm="no"
+fi
+# Disable KVM for linux-user
+if test "$kvm" = "yes" -a "$target_softmmu" = "no" ; then
+  kvm="no"
+fi
+
 case "$target_cpu" in
   i386)
     echo "TARGET_ARCH=i386" >> $config_mak
@@ -1420,6 +1458,11 @@ case "$target_cpu" in
     then
       echo "#define USE_KQEMU 1" >> $config_h
     fi
+    if test "$kvm" = "yes" ; then
+      echo "CONFIG_KVM=yes" >> $config_mak
+      echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak
+      echo "#define CONFIG_KVM" >> $config_h
+    fi
     gcc3minver=`$cc --version 2> /dev/null| fgrep "(GCC) 3." | awk '{ print $3 }' | cut -f2 -d.`
     if test -n "$gcc3minver" && test $gcc3minver -gt 3
     then
@@ -1437,6 +1480,11 @@ case "$target_cpu" in
     then
       echo "#define USE_KQEMU 1" >> $config_h
     fi
+    if test "$kvm" = "yes" ; then
+      echo "CONFIG_KVM=yes" >> $config_mak
+      echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak
+      echo "#define CONFIG_KVM 1" >> $config_h
+    fi
   ;;
   alpha)
     echo "TARGET_ARCH=alpha" >> $config_mak
index 5dcac74c3400a9b9283aa4426a0a667df442e7ea..46d4487811a85b2da9b2b8a4e80ae604e7f8ab49 100644 (file)
@@ -142,6 +142,9 @@ typedef struct icount_decr_u16 {
 } icount_decr_u16;
 #endif
 
+struct kvm_run;
+struct KVMState;
+
 #define CPU_TEMP_BUF_NLONGS 128
 #define CPU_COMMON                                                      \
     struct TranslationBlock *current_tb; /* currently executing TB  */  \
@@ -199,6 +202,9 @@ typedef struct icount_decr_u16 {
     /* user data */                                                     \
     void *opaque;                                                       \
                                                                         \
-    const char *cpu_model_str;
+    const char *cpu_model_str;                                          \
+    struct KVMState *kvm_state;                                         \
+    struct kvm_run *kvm_run;                                            \
+    int kvm_fd;
 
 #endif
index 62886a4f187103ca878e831ab5bf97f03b75712e..b3662400ffe712ee8521f5c885a7c4756a6145d4 100644 (file)
@@ -22,6 +22,7 @@
 #include "exec.h"
 #include "disas.h"
 #include "tcg.h"
+#include "kvm.h"
 
 #if !defined(CONFIG_SOFTMMU)
 #undef EAX
@@ -371,6 +372,19 @@ int cpu_exec(CPUState *env1)
             }
 #endif
 
+            if (kvm_enabled()) {
+                int ret;
+                ret = kvm_cpu_exec(env);
+                if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
+                    env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
+                    env->exception_index = EXCP_INTERRUPT;
+                    cpu_loop_exit();
+                } else if (env->halted) {
+                    cpu_loop_exit();
+                } else
+                    longjmp(env->jmp_env, 1);
+            }
+
             next_tb = 0; /* force lookup of first TB */
             for(;;) {
                 interrupt_request = env->interrupt_request;
diff --git a/exec.c b/exec.c
index 697a19eb255c0b99930533fe557effe8ab384a06..ef1072b6dd63c69a7ad568759c1f401c3986d464 100644 (file)
--- a/exec.c
+++ b/exec.c
@@ -39,6 +39,7 @@
 #include "tcg.h"
 #include "hw/hw.h"
 #include "osdep.h"
+#include "kvm.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
 #endif
@@ -2212,6 +2213,9 @@ void cpu_register_physical_memory(target_phys_addr_t start_addr,
         kqemu_set_phys_mem(start_addr, size, phys_offset);
     }
 #endif
+    if (kvm_enabled())
+        kvm_set_phys_mem(start_addr, size, phys_offset);
+
     size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
     end_addr = start_addr + (target_phys_addr_t)size;
     for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
index 45963d37e88c10f443e416010f5be537081f48fe..66a5faa0fdbcd0706c629bc52ad88e7a10e679d1 100644 (file)
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -23,6 +23,7 @@
 #include "sysemu.h"
 #include "i2c.h"
 #include "smbus.h"
+#include "kvm.h"
 
 //#define DEBUG
 
@@ -501,6 +502,12 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
 
     register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s);
 
+    if (kvm_enabled()) {
+        /* Mark SMM as already inited to prevent SMM from running.  KVM does not
+         * support SMM mode. */
+        pci_conf[0x5B] = 0x02;
+    }
+
     /* XXX: which specification is used ? The i82731AB has different
        mappings */
     pci_conf[0x5f] = (parallel_hds[0] != NULL ? 0x80 : 0) | 0x10;
index 61bd33543ac5b499b158dfc7a47454bfb1528bcf..8fff3aa75203761758ae61e98ea6cdff550ca359 100644 (file)
--- a/monitor.c
+++ b/monitor.c
@@ -37,6 +37,7 @@
 #include <dirent.h>
 #include "qemu-timer.h"
 #include "migration.h"
+#include "kvm.h"
 
 //#define DEBUG
 //#define DEBUG_COMPLETION
@@ -1263,6 +1264,19 @@ static void do_info_kqemu(void)
 #endif
 }
 
+static void do_info_kvm(void)
+{
+#ifdef CONFIG_KVM
+    term_printf("kvm support: ");
+    if (kvm_enabled())
+       term_printf("enabled\n");
+    else
+       term_printf("disabled\n");
+#else
+    term_printf("kvm support: not compiled\n");
+#endif
+}
+
 #ifdef CONFIG_PROFILER
 
 int64_t kqemu_time;
@@ -1497,6 +1511,8 @@ static const term_cmd_t info_cmds[] = {
       "", "show dynamic compiler info", },
     { "kqemu", "", do_info_kqemu,
       "", "show kqemu information", },
+    { "kvm", "", do_info_kvm,
+      "", "show kvm information", },
     { "usb", "", usb_info,
       "", "show guest USB devices", },
     { "usbhost", "", usb_host_info,
index 263a477765ddb8f6f44b1a8f3f15114d7827da9a..167bae268adcce13fa4845c5dd053424ed97a047 100644 (file)
@@ -587,6 +587,8 @@ typedef struct CPUX86State {
     target_ulong kernelgsbase;
 #endif
 
+    uint64_t tsc;
+
     uint64_t pat;
 
     /* exception/interrupt handling */
@@ -617,6 +619,10 @@ typedef struct CPUX86State {
     int kqemu_enabled;
     int last_io_time;
 #endif
+
+    /* For KVM */
+    uint64_t interrupt_bitmap[256 / 64];
+
     /* in order to simplify APIC support, we leave this pointer to the
        user */
     struct APICState *apic_state;
index bcf5e7f70558a74aa1bd81b753bed35203b2279a..4b8c5037983664e758bb13108a152c01b5a2aa1d 100644 (file)
@@ -29,6 +29,7 @@
 #include "exec-all.h"
 #include "svm.h"
 #include "qemu-common.h"
+#include "kvm.h"
 
 //#define DEBUG_MMU
 
@@ -115,6 +116,8 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
 #ifdef USE_KQEMU
     kqemu_init(env);
 #endif
+    if (kvm_enabled())
+        kvm_init_vcpu(env);
     return env;
 }
 
@@ -1288,6 +1291,40 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
 }
 #endif /* !CONFIG_USER_ONLY */
 
+#if defined(CONFIG_KVM)
+static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx,
+                       uint32_t *ecx, uint32_t *edx)
+{
+    uint32_t vec[4];
+
+#ifdef __x86_64__
+    asm volatile("cpuid"
+                : "=a"(vec[0]), "=b"(vec[1]),
+                  "=c"(vec[2]), "=d"(vec[3])
+                : "0"(function) : "cc");
+#else
+    asm volatile("pusha \n\t"
+                "cpuid \n\t"
+                "mov %%eax, 0(%1) \n\t"
+                "mov %%ebx, 4(%1) \n\t"
+                "mov %%ecx, 8(%1) \n\t"
+                "mov %%edx, 12(%1) \n\t"
+                "popa"
+                : : "a"(function), "S"(vec)
+                : "memory", "cc");
+#endif
+
+    if (eax)
+       *eax = vec[0];
+    if (ebx)
+       *ebx = vec[1];
+    if (ecx)
+       *ecx = vec[2];
+    if (edx)
+       *edx = vec[3];
+}
+#endif
+
 void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
                    uint32_t *eax, uint32_t *ebx,
                    uint32_t *ecx, uint32_t *edx)
@@ -1307,12 +1344,23 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
         *ebx = env->cpuid_vendor1;
         *edx = env->cpuid_vendor2;
         *ecx = env->cpuid_vendor3;
+
+        /* sysenter isn't supported on compatibility mode on AMD.  and syscall
+         * isn't supported in compatibility mode on Intel.  so advertise the
+         * actuall cpu, and say goodbye to migration between different vendors
+         * is you use compatibility mode. */
+        if (kvm_enabled())
+            host_cpuid(0, NULL, ebx, ecx, edx);
         break;
     case 1:
         *eax = env->cpuid_version;
         *ebx = (env->cpuid_apic_id << 24) | 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
         *ecx = env->cpuid_ext_features;
         *edx = env->cpuid_features;
+
+        /* "Hypervisor present" bit required for Microsoft SVVP */
+        if (kvm_enabled())
+            *ecx |= (1 << 31);
         break;
     case 2:
         /* cache info: needed for Pentium Pro compatibility */
@@ -1390,6 +1438,31 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
         *ebx = 0;
         *ecx = env->cpuid_ext3_features;
         *edx = env->cpuid_ext2_features;
+
+        if (kvm_enabled()) {
+            uint32_t h_eax, h_edx;
+
+            host_cpuid(0x80000001, &h_eax, NULL, NULL, &h_edx);
+
+            /* disable CPU features that the host does not support */
+
+            /* long mode */
+            if ((h_edx & 0x20000000) == 0 /* || !lm_capable_kernel */)
+                *edx &= ~0x20000000;
+            /* syscall */
+            if ((h_edx & 0x00000800) == 0)
+                *edx &= ~0x00000800;
+            /* nx */
+            if ((h_edx & 0x00100000) == 0)
+                *edx &= ~0x00100000;
+
+            /* disable CPU features that KVM cannot support */
+
+            /* svm */
+            *ecx &= ~4UL;
+            /* 3dnow */
+            *edx = ~0xc0000000;
+        }
         break;
     case 0x80000002:
     case 0x80000003:
diff --git a/vl.c b/vl.c
index ee93c73087641d61525b763eade5de9a931f2b24..ea9e6529e9bb49e3308865b4559f0206f965e94f 100644 (file)
--- a/vl.c
+++ b/vl.c
@@ -39,6 +39,7 @@
 #include "block.h"
 #include "audio/audio.h"
 #include "migration.h"
+#include "kvm.h"
 
 #include <unistd.h>
 #include <fcntl.h>
@@ -4782,6 +4783,9 @@ static void help(int exitcode)
            "-kernel-kqemu   enable KQEMU full virtualization (default is user mode only)\n"
            "-no-kqemu       disable KQEMU kernel module usage\n"
 #endif
+#ifdef CONFIG_KVM
+           "-enable-kvm     enable KVM full virtualization support\n"
+#endif
 #ifdef TARGET_I386
            "-no-acpi        disable ACPI\n"
 #endif
@@ -4887,6 +4891,7 @@ enum {
     QEMU_OPTION_pidfile,
     QEMU_OPTION_no_kqemu,
     QEMU_OPTION_kernel_kqemu,
+    QEMU_OPTION_enable_kvm,
     QEMU_OPTION_win2k_hack,
     QEMU_OPTION_usb,
     QEMU_OPTION_usbdevice,
@@ -4973,6 +4978,9 @@ static const QEMUOption qemu_options[] = {
     { "no-kqemu", 0, QEMU_OPTION_no_kqemu },
     { "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu },
 #endif
+#ifdef CONFIG_KVM
+    { "enable-kvm", 0, QEMU_OPTION_enable_kvm },
+#endif
 #if defined(TARGET_PPC) || defined(TARGET_SPARC)
     { "g", 1, QEMU_OPTION_g },
 #endif
@@ -5793,6 +5801,14 @@ int main(int argc, char **argv)
             case QEMU_OPTION_kernel_kqemu:
                 kqemu_allowed = 2;
                 break;
+#endif
+#ifdef CONFIG_KVM
+            case QEMU_OPTION_enable_kvm:
+                kvm_allowed = 1;
+#ifdef USE_KQEMU
+                kqemu_allowed = 0;
+#endif
+                break;
 #endif
             case QEMU_OPTION_usb:
                 usb_enabled = 1;
@@ -5928,6 +5944,14 @@ int main(int argc, char **argv)
         }
     }
 
+#if defined(CONFIG_KVM) && defined(USE_KQEMU)
+    if (kvm_allowed && kqemu_allowed) {
+        fprintf(stderr,
+                "You can not enable both KVM and kqemu at the same time\n");
+        exit(1);
+    }
+#endif
+
     machine->max_cpus = machine->max_cpus ?: 1; /* Default to UP */
     if (smp_cpus > machine->max_cpus) {
         fprintf(stderr, "Number of SMP cpus requested (%d), exceeds max cpus "
@@ -6229,6 +6253,16 @@ int main(int argc, char **argv)
         }
     }
 
+    if (kvm_enabled()) {
+        int ret;
+
+        ret = kvm_init(smp_cpus);
+        if (ret < 0) {
+            fprintf(stderr, "failed to initialize KVM\n");
+            exit(1);
+        }
+    }
+
     machine->init(ram_size, vga_ram_size, boot_devices, ds,
                   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);