Merge remote-tracking branch 'upstream' into next

author Avi Kivity <avi@redhat.com>

Sun, 5 Aug 2012 10:25:10 +0000 (13:25 +0300)

committer Avi Kivity <avi@redhat.com>

Sun, 5 Aug 2012 10:25:10 +0000 (13:25 +0300)
author Avi Kivity <avi@redhat.com>
Sun, 5 Aug 2012 10:25:10 +0000 (13:25 +0300)
committer Avi Kivity <avi@redhat.com>
Sun, 5 Aug 2012 10:25:10 +0000 (13:25 +0300)
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c

index bd77cb507c1c7401124dbd3dc86fe663c17e6737..eac65380bd201a78caf63f971240193235d93b23 100644 (file)
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -924,6 +924,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
         return 0;
  }
  
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+       if (!irqchip_in_kernel(kvm))
+               return -ENXIO;
+
+       irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+                                       irq_event->irq, irq_event->level);
+       return 0;
+}
+
  long kvm_arch_vm_ioctl(struct file *filp,
                 unsigned int ioctl, unsigned long arg)
  {
@@ -963,29 +973,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
                         goto out;
                 }
                 break;
-       case KVM_IRQ_LINE_STATUS:
-       case KVM_IRQ_LINE: {
-               struct kvm_irq_level irq_event;
-
-               r = -EFAULT;
-               if (copy_from_user(&irq_event, argp, sizeof irq_event))
-                       goto out;
-               r = -ENXIO;
-               if (irqchip_in_kernel(kvm)) {
-                       __s32 status;
-                       status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-                                   irq_event.irq, irq_event.level);
-                       if (ioctl == KVM_IRQ_LINE_STATUS) {
-                               r = -EFAULT;
-                               irq_event.status = status;
-                               if (copy_to_user(argp, &irq_event,
-                                                       sizeof irq_event))
-                                       goto out;
-                       }
-                       r = 0;
-               }
-               break;
-               }
         case KVM_GET_IRQCHIP: {
                 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
                 struct kvm_irqchip chip;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h

index 50ea12fd7bf5eeab23e6fc1bccafdf4dbe2ad89b..572ad0141268e54e449157b6e0ee0c64bb94d623 100644 (file)
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -52,6 +52,8 @@
  
  struct kvm;
  extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range(struct kvm *kvm,
+                              unsigned long start, unsigned long end);
  extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
  extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
  extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c

index d03eb6f7b0584e368bdd96e0f1969af58e885371..3c635c0616b0bbe3b4dd0909e47f883eaffa4eac 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
         goto out_put;
  }
  
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-                         int (*handler)(struct kvm *kvm, unsigned long *rmapp,
-                                        unsigned long gfn))
+static int kvm_handle_hva_range(struct kvm *kvm,
+                               unsigned long start,
+                               unsigned long end,
+                               int (*handler)(struct kvm *kvm,
+                                              unsigned long *rmapp,
+                                              unsigned long gfn))
  {
         int ret;
         int retval = 0;
@@ -767,15 +770,25 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
  
         slots = kvm_memslots(kvm);
         kvm_for_each_memslot(memslot, slots) {
-               unsigned long start = memslot->userspace_addr;
-               unsigned long end;
+               unsigned long hva_start, hva_end;
+               gfn_t gfn, gfn_end;
  
-               end = start + (memslot->npages << PAGE_SHIFT);
-               if (hva >= start && hva < end) {
-                       gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+               hva_start = max(start, memslot->userspace_addr);
+               hva_end = min(end, memslot->userspace_addr +
+                                       (memslot->npages << PAGE_SHIFT));
+               if (hva_start >= hva_end)
+                       continue;
+               /*
+                * {gfn(page) | page intersects with [hva_start, hva_end)} =
+                * {gfn, gfn+1, ..., gfn_end-1}.
+                */
+               gfn = hva_to_gfn_memslot(hva_start, memslot);
+               gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+
+               for (; gfn < gfn_end; ++gfn) {
+                       gfn_t gfn_offset = gfn - memslot->base_gfn;
  
-                       ret = handler(kvm, &memslot->rmap[gfn_offset],
-                                     memslot->base_gfn + gfn_offset);
+                       ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
                         retval |= ret;
                 }
         }
@@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
         return retval;
  }
  
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+                         int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+                                        unsigned long gfn))
+{
+       return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
+}
+
  static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
                            unsigned long gfn)
  {
@@ -850,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
         return 0;
  }
  
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+       if (kvm->arch.using_mmu_notifiers)
+               kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+       return 0;
+}
+
  static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
                          unsigned long gfn)
  {
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c

index c510fc961302c2d1ae1284cc3d1aab1139002fbd..c8f6c58267426309ff979a7ed2a0d473006adda1 100644 (file)
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
  
         if (likely(!pfnmap)) {
                 unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
-               pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
+               pfn = gfn_to_pfn_memslot(slot, gfn);
                 if (is_error_pfn(pfn)) {
                         printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
                                         (long)gfn);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h

index 11e4e3236937e106aba159b83e7cc6ae6167c7b0..eac4fb5fb826b1edb7ff38697a38f3a8c94b5443 100644 (file)
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -140,6 +140,7 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
  extern unsigned long thread_saved_pc(struct task_struct *t);
  
  extern void show_code(struct pt_regs *regs);
+extern int insn_to_mnemonic(unsigned char *instruction, char buf[8]);
  
  unsigned long get_wchan(struct task_struct *p);
  #define task_pt_regs(tsk) ((struct pt_regs *) \
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c

index 619c5d3507264ca1f7417563a9152b55270374a2..ffb622b16ab5251f2b718ad0be832d03e02b4a4e 100644 (file)
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1468,6 +1468,33 @@ static struct insn *find_insn(unsigned char *code)
         return NULL;
  }
  
+/**
+ * insn_to_mnemonic - decode an s390 instruction
+ * @instruction: instruction to decode
+ * @buf: buffer to fill with mnemonic
+ *
+ * Decode the instruction at @instruction and store the corresponding
+ * mnemonic into @buf.
+ * @buf is left unchanged if the instruction could not be decoded.
+ * Returns:
+ *  %0 on success, %-ENOENT if the instruction was not found.
+ */
+int insn_to_mnemonic(unsigned char *instruction, char buf[8])
+{
+       struct insn *insn;
+
+       insn = find_insn(instruction);
+       if (!insn)
+               return -ENOENT;
+       if (insn->name[0] == '\0')
+               snprintf(buf, sizeof(buf), "%s",
+                        long_insn_name[(int) insn->name[1]]);
+       else
+               snprintf(buf, sizeof(buf), "%.5s", insn->name);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(insn_to_mnemonic);
+
  static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
  {
         struct insn *insn;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig

index 78eb9847008f2cc8eff307a89e330d42b3dcd69a..a6e2677724e169238a8dfa0a6b97200a764734fb 100644 (file)
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
         depends on HAVE_KVM && EXPERIMENTAL
         select PREEMPT_NOTIFIERS
         select ANON_INODES
+       select HAVE_KVM_CPU_RELAX_INTERCEPT
         ---help---
           Support hosting paravirtualized guest machines using the SIE
           virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c

index c88bb7793390d7a1e26d0982529b32d4747ca67f..a390687feb1359d6b579024d51787e4eeff0207e 100644 (file)
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -14,6 +14,8 @@
  #include <linux/kvm.h>
  #include <linux/kvm_host.h>
  #include "kvm-s390.h"
+#include "trace.h"
+#include "trace-s390.h"
  
  static int diag_release_pages(struct kvm_vcpu *vcpu)
  {
@@ -98,6 +100,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
         vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
         VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx",
           vcpu->run->s390_reset_flags);
+       trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags);
         return -EREMOTE;
  }
  
@@ -105,6 +108,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
  {
         int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
  
+       trace_kvm_s390_handle_diag(vcpu, code);
         switch (code) {
         case 0x10:
                 return diag_release_pages(vcpu);
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c

index adae539f12e2fbaeb49f41b462f95bc4354ce3f9..22798ec33fd16bd58e5a9726a6f5eec41d798d6a 100644 (file)
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -19,6 +19,8 @@
  
  #include "kvm-s390.h"
  #include "gaccess.h"
+#include "trace.h"
+#include "trace-s390.h"
  
  static int handle_lctlg(struct kvm_vcpu *vcpu)
  {
@@ -45,6 +47,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
  
         VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
                    disp2);
+       trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
  
         do {
                 rc = get_guest_u64(vcpu, useraddr,
@@ -82,6 +85,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
  
         VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
                    disp2);
+       trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
  
         reg = reg1;
         do {
@@ -135,6 +139,8 @@ static int handle_stop(struct kvm_vcpu *vcpu)
         vcpu->stat.exit_stop_request++;
         spin_lock_bh(&vcpu->arch.local_int.lock);
  
+       trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
+
         if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
                 vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
                 rc = SIE_INTERCEPT_RERUNVCPU;
@@ -171,6 +177,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)
         int rc;
  
         vcpu->stat.exit_validity++;
+       trace_kvm_s390_intercept_validity(vcpu, viwhy);
         if (viwhy == 0x37) {
                 vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
                                     vcpu->arch.gmap);
@@ -213,6 +220,9 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
         intercept_handler_t handler;
  
         vcpu->stat.exit_instruction++;
+       trace_kvm_s390_intercept_instruction(vcpu,
+                                            vcpu->arch.sie_block->ipa,
+                                            vcpu->arch.sie_block->ipb);
         handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
         if (handler)
                 return handler(vcpu);
@@ -222,6 +232,7 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
  static int handle_prog(struct kvm_vcpu *vcpu)
  {
         vcpu->stat.exit_program_interruption++;
+       trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
         return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
  }
  
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c

index b7bc1aac8ed2dc3611c50f24c7c4b53989642661..7556231fb073f919c9f89b97cbddc3e60b169da5 100644 (file)
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -19,6 +19,7 @@
  #include <asm/uaccess.h>
  #include "kvm-s390.h"
  #include "gaccess.h"
+#include "trace-s390.h"
  
  static int psw_extint_disabled(struct kvm_vcpu *vcpu)
  {
@@ -130,6 +131,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
         case KVM_S390_INT_EMERGENCY:
                 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
                 vcpu->stat.deliver_emergency_signal++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                                inti->emerg.code, 0);
                 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
                 if (rc == -EFAULT)
                         exception = 1;
@@ -152,6 +155,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
         case KVM_S390_INT_EXTERNAL_CALL:
                 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
                 vcpu->stat.deliver_external_call++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                                inti->extcall.code, 0);
                 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
                 if (rc == -EFAULT)
                         exception = 1;
@@ -175,6 +180,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                 VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
                            inti->ext.ext_params);
                 vcpu->stat.deliver_service_signal++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                                inti->ext.ext_params, 0);
                 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
                 if (rc == -EFAULT)
                         exception = 1;
@@ -198,6 +205,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
                            inti->ext.ext_params, inti->ext.ext_params2);
                 vcpu->stat.deliver_virtio_interrupt++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                                inti->ext.ext_params,
+                                                inti->ext.ext_params2);
                 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
                 if (rc == -EFAULT)
                         exception = 1;
@@ -229,6 +239,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
         case KVM_S390_SIGP_STOP:
                 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
                 vcpu->stat.deliver_stop_signal++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                                0, 0);
                 __set_intercept_indicator(vcpu, inti);
                 break;
  
@@ -236,12 +248,16 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                 VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
                            inti->prefix.address);
                 vcpu->stat.deliver_prefix_signal++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                                inti->prefix.address, 0);
                 kvm_s390_set_prefix(vcpu, inti->prefix.address);
                 break;
  
         case KVM_S390_RESTART:
                 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
                 vcpu->stat.deliver_restart_signal++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                                0, 0);
                 rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
                   restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
                 if (rc == -EFAULT)
@@ -259,6 +275,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                            inti->pgm.code,
                            table[vcpu->arch.sie_block->ipa >> 14]);
                 vcpu->stat.deliver_program_int++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                                inti->pgm.code, 0);
                 rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
                 if (rc == -EFAULT)
                         exception = 1;
@@ -515,6 +533,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
         inti->pgm.code = code;
  
         VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
         spin_lock_bh(&li->lock);
         list_add(&inti->list, &li->list);
         atomic_set(&li->active, 1);
@@ -556,6 +575,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
                 kfree(inti);
                 return -EINVAL;
         }
+       trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
+                                2);
  
         mutex_lock(&kvm->lock);
         fi = &kvm->arch.float_int;
@@ -621,6 +642,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
                 kfree(inti);
                 return -EINVAL;
         }
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
+                                  s390int->parm64, 2);
  
         mutex_lock(&vcpu->kvm->lock);
         li = &vcpu->arch.local_int;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index d470ccbfabae02e015e206f833a93ec0820941fa..e83df7f0fedd08a084e97f43af61a94c7c15d88f 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -32,6 +32,10 @@
  #include "kvm-s390.h"
  #include "gaccess.h"
  
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#include "trace-s390.h"
+
  #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  
  struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -242,6 +246,7 @@ out_err:
  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
  {
         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+       trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
         if (!kvm_is_ucontrol(vcpu->kvm)) {
                 clear_bit(63 - vcpu->vcpu_id,
                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
@@ -417,6 +422,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                 goto out_free_sie_block;
         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
                  vcpu->arch.sie_block);
+       trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
  
         return vcpu;
  out_free_sie_block:
@@ -607,18 +613,22 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
         local_irq_enable();
         VCPU_EVENT(vcpu, 6, "entering sie flags %x",
                    atomic_read(&vcpu->arch.sie_block->cpuflags));
+       trace_kvm_s390_sie_enter(vcpu,
+                                atomic_read(&vcpu->arch.sie_block->cpuflags));
         rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
         if (rc) {
                 if (kvm_is_ucontrol(vcpu->kvm)) {
                         rc = SIE_INTERCEPT_UCONTROL;
                 } else {
                         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+                       trace_kvm_s390_sie_fault(vcpu);
                         kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
                         rc = 0;
                 }
         }
         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                    vcpu->arch.sie_block->icptcode);
+       trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
         local_irq_disable();
         kvm_guest_exit();
         local_irq_enable();
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c

index 60da903d6f3ecbb596a2fc90352be1fffc137315..ed256fdd7b58040ac9208ea9aa8ff01979bb8334 100644 (file)
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -20,6 +20,7 @@
  #include <asm/sysinfo.h>
  #include "gaccess.h"
  #include "kvm-s390.h"
+#include "trace.h"
  
  static int handle_set_prefix(struct kvm_vcpu *vcpu)
  {
@@ -59,6 +60,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
         kvm_s390_set_prefix(vcpu, address);
  
         VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
+       trace_kvm_s390_handle_prefix(vcpu, 1, address);
  out:
         return 0;
  }
@@ -91,6 +93,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
         }
  
         VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+       trace_kvm_s390_handle_prefix(vcpu, 0, address);
  out:
         return 0;
  }
@@ -119,6 +122,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
         }
  
         VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
+       trace_kvm_s390_handle_stap(vcpu, useraddr);
  out:
         return 0;
  }
@@ -164,9 +168,11 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
                            &facility_list, sizeof(facility_list));
         if (rc == -EFAULT)
                 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-       else
+       else {
                 VCPU_EVENT(vcpu, 5, "store facility list value %x",
                            facility_list);
+               trace_kvm_s390_handle_stfl(vcpu, facility_list);
+       }
         return 0;
  }
  
@@ -278,6 +284,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
                 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
                 goto out_mem;
         }
+       trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
         free_page(mem);
         vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
         vcpu->run->s.regs.gprs[0] = 0;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c

index 56f80e1f98f7b1955e23d07bb80497ac3b33dca7..566ddf6e8dfb54290afdb69d706cceadd7b78dbb 100644 (file)
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -18,6 +18,7 @@
  #include <asm/sigp.h>
  #include "gaccess.h"
  #include "kvm-s390.h"
+#include "trace.h"
  
  static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
                         u64 *reg)
@@ -344,6 +345,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
         else
                 parameter = vcpu->run->s.regs.gprs[r1 + 1];
  
+       trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
         switch (order_code) {
         case SIGP_SENSE:
                 vcpu->stat.instruction_sigp_sense++;
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h

new file mode 100644 (file)

index 0000000..90fdf85
--- /dev/null
+++ b/arch/s390/kvm/trace-s390.h
@@ -0,0 +1,210 @@
+#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVMS390_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm-s390
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace-s390
+
+/*
+ * Trace point for the creation of the kvm instance.
+ */
+TRACE_EVENT(kvm_s390_create_vm,
+           TP_PROTO(unsigned long type),
+           TP_ARGS(type),
+
+           TP_STRUCT__entry(
+                   __field(unsigned long, type)
+                   ),
+
+           TP_fast_assign(
+                   __entry->type = type;
+                   ),
+
+           TP_printk("create vm%s",
+                     __entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "")
+       );
+
+/*
+ * Trace points for creation and destruction of vpcus.
+ */
+TRACE_EVENT(kvm_s390_create_vcpu,
+           TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu,
+                    struct kvm_s390_sie_block *sie_block),
+           TP_ARGS(id, vcpu, sie_block),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int, id)
+                   __field(struct kvm_vcpu *, vcpu)
+                   __field(struct kvm_s390_sie_block *, sie_block)
+                   ),
+
+           TP_fast_assign(
+                   __entry->id = id;
+                   __entry->vcpu = vcpu;
+                   __entry->sie_block = sie_block;
+                   ),
+
+           TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
+                     __entry->vcpu, __entry->sie_block)
+       );
+
+TRACE_EVENT(kvm_s390_destroy_vcpu,
+           TP_PROTO(unsigned int id),
+           TP_ARGS(id),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int, id)
+                   ),
+
+           TP_fast_assign(
+                   __entry->id = id;
+                   ),
+
+           TP_printk("destroy cpu %d", __entry->id)
+       );
+
+/*
+ * Trace points for injection of interrupts, either per machine or
+ * per vcpu.
+ */
+
+#define kvm_s390_int_type                                              \
+       {KVM_S390_SIGP_STOP, "sigp stop"},                              \
+       {KVM_S390_PROGRAM_INT, "program interrupt"},                    \
+       {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"},                  \
+       {KVM_S390_RESTART, "sigp restart"},                             \
+       {KVM_S390_INT_VIRTIO, "virtio interrupt"},                      \
+       {KVM_S390_INT_SERVICE, "sclp interrupt"},                       \
+       {KVM_S390_INT_EMERGENCY, "sigp emergency"},                     \
+       {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
+
+TRACE_EVENT(kvm_s390_inject_vm,
+           TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
+           TP_ARGS(type, parm, parm64, who),
+
+           TP_STRUCT__entry(
+                   __field(__u32, inttype)
+                   __field(__u32, parm)
+                   __field(__u64, parm64)
+                   __field(int, who)
+                   ),
+
+           TP_fast_assign(
+                   __entry->inttype = type & 0x00000000ffffffff;
+                   __entry->parm = parm;
+                   __entry->parm64 = parm64;
+                   __entry->who = who;
+                   ),
+
+           TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
+                     (__entry->who == 1) ? " (from kernel)" :
+                     (__entry->who == 2) ? " (from user)" : "",
+                     __entry->inttype,
+                     __print_symbolic(__entry->inttype, kvm_s390_int_type),
+                     __entry->parm, __entry->parm64)
+       );
+
+TRACE_EVENT(kvm_s390_inject_vcpu,
+           TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
+                    int who),
+           TP_ARGS(id, type, parm, parm64, who),
+
+           TP_STRUCT__entry(
+                   __field(int, id)
+                   __field(__u32, inttype)
+                   __field(__u32, parm)
+                   __field(__u64, parm64)
+                   __field(int, who)
+                   ),
+
+           TP_fast_assign(
+                   __entry->id = id;
+                   __entry->inttype = type & 0x00000000ffffffff;
+                   __entry->parm = parm;
+                   __entry->parm64 = parm64;
+                   __entry->who = who;
+                   ),
+
+           TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
+                     (__entry->who == 1) ? " (from kernel)" :
+                     (__entry->who == 2) ? " (from user)" : "",
+                     __entry->id, __entry->inttype,
+                     __print_symbolic(__entry->inttype, kvm_s390_int_type),
+                     __entry->parm, __entry->parm64)
+       );
+
+/*
+ * Trace point for the actual delivery of interrupts.
+ */
+TRACE_EVENT(kvm_s390_deliver_interrupt,
+           TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1),
+           TP_ARGS(id, type, data0, data1),
+
+           TP_STRUCT__entry(
+                   __field(int, id)
+                   __field(__u32, inttype)
+                   __field(__u32, data0)
+                   __field(__u64, data1)
+                   ),
+
+           TP_fast_assign(
+                   __entry->id = id;
+                   __entry->inttype = type & 0x00000000ffffffff;
+                   __entry->data0 = data0;
+                   __entry->data1 = data1;
+                   ),
+
+           TP_printk("deliver interrupt (vcpu %d): type:%x (%s) "      \
+                     "data:%08x %016llx",
+                     __entry->id, __entry->inttype,
+                     __print_symbolic(__entry->inttype, kvm_s390_int_type),
+                     __entry->data0, __entry->data1)
+       );
+
+/*
+ * Trace point for resets that may be requested from userspace.
+ */
+TRACE_EVENT(kvm_s390_request_resets,
+           TP_PROTO(__u64 resets),
+           TP_ARGS(resets),
+
+           TP_STRUCT__entry(
+                   __field(__u64, resets)
+                   ),
+
+           TP_fast_assign(
+                   __entry->resets = resets;
+                   ),
+
+           TP_printk("requesting userspace resets %llx",
+                     __entry->resets)
+       );
+
+/*
+ * Trace point for a vcpu's stop requests.
+ */
+TRACE_EVENT(kvm_s390_stop_request,
+           TP_PROTO(unsigned int action_bits),
+           TP_ARGS(action_bits),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int, action_bits)
+                   ),
+
+           TP_fast_assign(
+                   __entry->action_bits = action_bits;
+                   ),
+
+           TP_printk("stop request, action_bits = %08x",
+                     __entry->action_bits)
+       );
+
+
+#endif /* _TRACE_KVMS390_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h

new file mode 100644 (file)

index 0000000..2b29e62
--- /dev/null
+++ b/arch/s390/kvm/trace.h
@@ -0,0 +1,341 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/sigp.h>
+#include <asm/debug.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Helpers for vcpu-specific tracepoints containing the same information
+ * as s390dbf VCPU_EVENTs.
+ */
+#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu
+#define VCPU_ARGS_COMMON vcpu
+#define VCPU_FIELD_COMMON __field(int, id)                     \
+       __field(unsigned long, pswmask)                         \
+       __field(unsigned long, pswaddr)
+#define VCPU_ASSIGN_COMMON do {                                                \
+       __entry->id = vcpu->vcpu_id;                                    \
+       __entry->pswmask = vcpu->arch.sie_block->gpsw.mask;             \
+       __entry->pswaddr = vcpu->arch.sie_block->gpsw.addr;             \
+       } while (0);
+#define VCPU_TP_PRINTK(p_str, p_args...)                               \
+       TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id,           \
+                 __entry->pswmask, __entry->pswaddr, p_args)
+
+/*
+ * Tracepoints for SIE entry and exit.
+ */
+TRACE_EVENT(kvm_s390_sie_enter,
+           TP_PROTO(VCPU_PROTO_COMMON, int cpuflags),
+           TP_ARGS(VCPU_ARGS_COMMON, cpuflags),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(int, cpuflags)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->cpuflags = cpuflags;
+                   ),
+
+           VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags)
+       );
+
+TRACE_EVENT(kvm_s390_sie_fault,
+           TP_PROTO(VCPU_PROTO_COMMON),
+           TP_ARGS(VCPU_ARGS_COMMON),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   ),
+
+           VCPU_TP_PRINTK("%s", "fault in sie instruction")
+       );
+
+#define sie_intercept_code                             \
+       {0x04, "Instruction"},                          \
+       {0x08, "Program interruption"},                 \
+       {0x0C, "Instruction and program interuption"},  \
+       {0x10, "External request"},                     \
+       {0x14, "External interruption"},                \
+       {0x18, "I/O request"},                          \
+       {0x1C, "Wait state"},                           \
+       {0x20, "Validity"},                             \
+       {0x28, "Stop request"}
+
+TRACE_EVENT(kvm_s390_sie_exit,
+           TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
+           TP_ARGS(VCPU_ARGS_COMMON, icptcode),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(u8, icptcode)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->icptcode = icptcode;
+                   ),
+
+           VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode,
+                          __print_symbolic(__entry->icptcode,
+                                           sie_intercept_code))
+       );
+
+/*
+ * Trace point for intercepted instructions.
+ */
+TRACE_EVENT(kvm_s390_intercept_instruction,
+           TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+           TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(__u64, instruction)
+                   __field(char, insn[8])
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->instruction = ((__u64)ipa << 48) |
+                   ((__u64)ipb << 16);
+                   ),
+
+           VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
+                          __entry->instruction,
+                          insn_to_mnemonic((unsigned char *)
+                                           &__entry->instruction,
+                                        __entry->insn) ?
+                          "unknown" : __entry->insn)
+       );
+
+/*
+ * Trace point for intercepted program interruptions.
+ */
+TRACE_EVENT(kvm_s390_intercept_prog,
+           TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+           TP_ARGS(VCPU_ARGS_COMMON, code),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(__u16, code)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->code = code;
+                   ),
+
+           VCPU_TP_PRINTK("intercepted program interruption %04x",
+                          __entry->code)
+       );
+
+/*
+ * Trace point for validity intercepts.
+ */
+TRACE_EVENT(kvm_s390_intercept_validity,
+           TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy),
+           TP_ARGS(VCPU_ARGS_COMMON, viwhy),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(__u16, viwhy)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->viwhy = viwhy;
+                   ),
+
+           VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy)
+       );
+
+/*
+ * Trace points for instructions that are of special interest.
+ */
+
+#define sigp_order_codes                                       \
+       {SIGP_SENSE, "sense"},                                  \
+       {SIGP_EXTERNAL_CALL, "external call"},                  \
+       {SIGP_EMERGENCY_SIGNAL, "emergency signal"},            \
+       {SIGP_STOP, "stop"},                                    \
+       {SIGP_STOP_AND_STORE_STATUS, "stop and store status"},  \
+       {SIGP_SET_ARCHITECTURE, "set architecture"},            \
+       {SIGP_SET_PREFIX, "set prefix"},                        \
+       {SIGP_SENSE_RUNNING, "sense running"},                  \
+       {SIGP_RESTART, "restart"}
+
+TRACE_EVENT(kvm_s390_handle_sigp,
+           TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
+                    __u32 parameter),
+           TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(__u8, order_code)
+                   __field(__u16, cpu_addr)
+                   __field(__u32, parameter)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->order_code = order_code;
+                   __entry->cpu_addr = cpu_addr;
+                   __entry->parameter = parameter;
+                   ),
+
+           VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \
+                          "parameter %08x", __entry->order_code,
+                          __print_symbolic(__entry->order_code,
+                                           sigp_order_codes),
+                          __entry->cpu_addr, __entry->parameter)
+       );
+
+#define diagnose_codes                         \
+       {0x10, "release pages"},                \
+       {0x44, "time slice end"},               \
+       {0x308, "ipl functions"},               \
+       {0x500, "kvm hypercall"},               \
+       {0x501, "kvm breakpoint"}
+
+TRACE_EVENT(kvm_s390_handle_diag,
+           TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+           TP_ARGS(VCPU_ARGS_COMMON, code),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(__u16, code)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->code = code;
+                   ),
+
+           VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code,
+                          __print_symbolic(__entry->code, diagnose_codes))
+       );
+
+TRACE_EVENT(kvm_s390_handle_lctl,
+           TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+           TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(int, g)
+                   __field(int, reg1)
+                   __field(int, reg3)
+                   __field(u64, addr)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->g = g;
+                   __entry->reg1 = reg1;
+                   __entry->reg3 = reg3;
+                   __entry->addr = addr;
+                   ),
+
+           VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx",
+                          __entry->g ? "lctlg" : "lctl",
+                          __entry->reg1, __entry->reg3, __entry->addr)
+       );
+
+TRACE_EVENT(kvm_s390_handle_prefix,
+           TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
+           TP_ARGS(VCPU_ARGS_COMMON, set, address),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(int, set)
+                   __field(u32, address)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->set = set;
+                   __entry->address = address;
+                   ),
+
+           VCPU_TP_PRINTK("%s prefix to %08x",
+                          __entry->set ? "setting" : "storing",
+                          __entry->address)
+       );
+
+TRACE_EVENT(kvm_s390_handle_stap,
+           TP_PROTO(VCPU_PROTO_COMMON, u64 address),
+           TP_ARGS(VCPU_ARGS_COMMON, address),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(u64, address)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->address = address;
+                   ),
+
+           VCPU_TP_PRINTK("storing cpu address to %016llx",
+                          __entry->address)
+       );
+
+TRACE_EVENT(kvm_s390_handle_stfl,
+           TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list),
+           TP_ARGS(VCPU_ARGS_COMMON, facility_list),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(unsigned int, facility_list)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->facility_list = facility_list;
+                   ),
+
+           VCPU_TP_PRINTK("store facility list value %08x",
+                          __entry->facility_list)
+       );
+
+TRACE_EVENT(kvm_s390_handle_stsi,
+           TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr),
+           TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(int, fc)
+                   __field(int, sel1)
+                   __field(int, sel2)
+                   __field(u64, addr)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->fc = fc;
+                   __entry->sel1 = sel1;
+                   __entry->sel2 = sel2;
+                   __entry->addr = addr;
+                   ),
+
+           VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx",
+                          __entry->fc, __entry->sel1, __entry->sel2,
+                          __entry->addr)
+       );
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 09155d64cf7e6a560e3c6a1501e568918d9f2ed9..48e713188469b40ee08566ef74fac216870a4ddd 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -500,11 +500,11 @@ struct kvm_vcpu_arch {
  };
  
  struct kvm_lpage_info {
-       unsigned long rmap_pde;
         int write_count;
  };
  
  struct kvm_arch_memory_slot {
+       unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
         struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
  };
  
@@ -957,6 +957,7 @@ extern bool kvm_rebooting;
  
  #define KVM_ARCH_WANT_MMU_NOTIFIER
  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
  int kvm_age_hva(struct kvm *kvm, unsigned long hva);
  int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
  void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig

index a28f338843eaa083a046ea69dca78f6e3ff541fe..45c044f0fff7a8a4862a2bf05bbacfbb76d3d407 100644 (file)
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -37,6 +37,7 @@ config KVM
         select TASK_DELAY_ACCT
         select PERF_EVENTS
         select HAVE_KVM_MSI
+       select HAVE_KVM_CPU_RELAX_INTERCEPT
         ---help---
           Support hosting fully virtualized guest machines using hardware
           virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile

index 4f579e8dcacf6747a7e3a34db765bf112233680f..04d30401c5cb26aa2b491ad3b0ceeb8d9fa7e8ff 100644 (file)
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API)       += $(addprefix ../../../virt/kvm/, iommu.o)
  kvm-$(CONFIG_KVM_ASYNC_PF)     += $(addprefix ../../../virt/kvm/, async_pf.o)
  
  kvm-y                  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-                          i8254.o timer.o cpuid.o pmu.o
+                          i8254.o cpuid.o pmu.o
  kvm-intel-y            += vmx.o
  kvm-amd-y              += svm.o
  
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index 0595f1397b7c0013058afec906e5d329f0d7f7d0..b496da684bd697ce2b50384f310af6d6af26d661 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
         }
         case 7: {
                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-               /* Mask ebx against host capbability word 9 */
+               /* Mask ebx against host capability word 9 */
                 if (index == 0) {
                         entry->ebx &= kvm_supported_word9_x86_features;
                         cpuid_mask(&entry->ebx, 9);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c

index 97d9a9914ba8d772e522911252261110cfa58677..10f0136f50c125ac96cb1b713d6f7e69e99dfe67 100644 (file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -642,7 +642,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
                         if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
                                 goto bad;
                 } else {
-                       /* exapand-down segment */
+                       /* expand-down segment */
                         if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
                                 goto bad;
                         lim = desc.d ? 0xffffffff : 0xffff;
@@ -1166,24 +1166,21 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
         int rc;
         struct read_cache *mc = &ctxt->mem_read;
  
-       while (size) {
-               int n = min(size, 8u);
-               size -= n;
-               if (mc->pos < mc->end)
-                       goto read_cached;
+       if (mc->pos < mc->end)
+               goto read_cached;
  
-               rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
-                                             &ctxt->exception);
-               if (rc != X86EMUL_CONTINUE)
-                       return rc;
-               mc->end += n;
+       WARN_ON((mc->end + size) >= sizeof(mc->data));
  
-       read_cached:
-               memcpy(dest, mc->data + mc->pos, n);
-               mc->pos += n;
-               dest += n;
-               addr += n;
-       }
+       rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
+                                     &ctxt->exception);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+
+       mc->end += size;
+
+read_cached:
+       memcpy(dest, mc->data + mc->pos, size);
+       mc->pos += size;
         return X86EMUL_CONTINUE;
  }
  
@@ -1383,7 +1380,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
         err_code = selector & 0xfffc;
         err_vec = GP_VECTOR;
  
-       /* can't load system descriptor into segment selecor */
+       /* can't load system descriptor into segment selector */
         if (seg <= VCPU_SREG_GS && !seg_desc.s)
                 goto exception;
  
@@ -2038,12 +2035,6 @@ static void
  setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
                         struct desc_struct *cs, struct desc_struct *ss)
  {
-       u16 selector;
-
-       memset(cs, 0, sizeof(struct desc_struct));
-       ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS);
-       memset(ss, 0, sizeof(struct desc_struct));
-
         cs->l = 0;              /* will be adjusted later */
         set_desc_base(cs, 0);   /* flat segment */
         cs->g = 1;              /* 4kb granularity */
@@ -2053,6 +2044,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
         cs->dpl = 0;            /* will be adjusted later */
         cs->p = 1;
         cs->d = 1;
+       cs->avl = 0;
  
         set_desc_base(ss, 0);   /* flat segment */
         set_desc_limit(ss, 0xfffff);    /* 4GB limit */
@@ -2062,6 +2054,8 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
         ss->d = 1;              /* 32bit stack segment */
         ss->dpl = 0;
         ss->p = 1;
+       ss->l = 0;
+       ss->avl = 0;
  }
  
  static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
@@ -2398,7 +2392,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
         set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
  
         /*
-        * Now load segment descriptors. If fault happenes at this stage
+        * Now load segment descriptors. If fault happens at this stage
          * it is handled in a context of new task
          */
         ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
@@ -2640,7 +2634,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
          *
          * 1. jmp/call/int to task gate: Check against DPL of the task gate
          * 2. Exception/IRQ/iret: No check is performed
-        * 3. jmp/call to TSS: Check agains DPL of the TSS
+        * 3. jmp/call to TSS: Check against DPL of the TSS
          */
         if (reason == TASK_SWITCH_GATE) {
                 if (idt_index != -1) {
@@ -2681,7 +2675,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
                 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
  
         /* set back link to prev task only if NT bit is set in eflags
-          note that old_tss_sel is not used afetr this point */
+          note that old_tss_sel is not used after this point */
         if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
                 old_tss_sel = 0xffff;
  
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c

index adba28f88d1a9d56c45e9716ac8dc8a69958144c..11300d2fa71445ff332cbdff30e7c640b959f02d 100644 (file)
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -108,7 +108,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
         ktime_t remaining;
         struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
  
-       if (!ps->pit_timer.period)
+       if (!ps->period)
                 return 0;
  
         /*
@@ -120,9 +120,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
          * itself with the initial count and continues counting
          * from there.
          */
-       remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
-       elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
-       elapsed = mod_64(elapsed, ps->pit_timer.period);
+       remaining = hrtimer_get_remaining(&ps->timer);
+       elapsed = ps->period - ktime_to_ns(remaining);
+       elapsed = mod_64(elapsed, ps->period);
  
         return elapsed;
  }
@@ -238,12 +238,12 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
         int value;
  
         spin_lock(&ps->inject_lock);
-       value = atomic_dec_return(&ps->pit_timer.pending);
+       value = atomic_dec_return(&ps->pending);
         if (value < 0)
                 /* spurious acks can be generated if, for example, the
                  * PIC is being reset.  Handle it gracefully here
                  */
-               atomic_inc(&ps->pit_timer.pending);
+               atomic_inc(&ps->pending);
         else if (value > 0)
                 /* in this case, we had multiple outstanding pit interrupts
                  * that we needed to inject.  Reinject
@@ -261,28 +261,17 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
         if (!kvm_vcpu_is_bsp(vcpu) || !pit)
                 return;
  
-       timer = &pit->pit_state.pit_timer.timer;
+       timer = &pit->pit_state.timer;
         if (hrtimer_cancel(timer))
                 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  }
  
  static void destroy_pit_timer(struct kvm_pit *pit)
  {
-       hrtimer_cancel(&pit->pit_state.pit_timer.timer);
+       hrtimer_cancel(&pit->pit_state.timer);
         flush_kthread_work(&pit->expired);
  }
  
-static bool kpit_is_periodic(struct kvm_timer *ktimer)
-{
-       struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
-                                                pit_timer);
-       return ps->is_periodic;
-}
-
-static struct kvm_timer_ops kpit_ops = {
-       .is_periodic = kpit_is_periodic,
-};
-
  static void pit_do_work(struct kthread_work *work)
  {
         struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
@@ -322,16 +311,16 @@ static void pit_do_work(struct kthread_work *work)
  
  static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
  {
-       struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-       struct kvm_pit *pt = ktimer->kvm->arch.vpit;
+       struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
+       struct kvm_pit *pt = ps->kvm->arch.vpit;
  
-       if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
-               atomic_inc(&ktimer->pending);
+       if (ps->reinject || !atomic_read(&ps->pending)) {
+               atomic_inc(&ps->pending);
                 queue_kthread_work(&pt->worker, &pt->expired);
         }
  
-       if (ktimer->t_ops->is_periodic(ktimer)) {
-               hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+       if (ps->is_periodic) {
+               hrtimer_add_expires_ns(&ps->timer, ps->period);
                 return HRTIMER_RESTART;
         } else
                 return HRTIMER_NORESTART;
@@ -340,7 +329,6 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
  static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
  {
         struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
-       struct kvm_timer *pt = &ps->pit_timer;
         s64 interval;
  
         if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
@@ -351,19 +339,18 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
         pr_debug("create pit timer, interval is %llu nsec\n", interval);
  
         /* TODO The new value only affected after the retriggered */
-       hrtimer_cancel(&pt->timer);
+       hrtimer_cancel(&ps->timer);
         flush_kthread_work(&ps->pit->expired);
-       pt->period = interval;
+       ps->period = interval;
         ps->is_periodic = is_period;
  
-       pt->timer.function = pit_timer_fn;
-       pt->t_ops = &kpit_ops;
-       pt->kvm = ps->pit->kvm;
+       ps->timer.function = pit_timer_fn;
+       ps->kvm = ps->pit->kvm;
  
-       atomic_set(&pt->pending, 0);
+       atomic_set(&ps->pending, 0);
         ps->irq_ack = 1;
  
-       hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+       hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
                       HRTIMER_MODE_ABS);
  }
  
@@ -639,7 +626,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
         }
         mutex_unlock(&pit->pit_state.lock);
  
-       atomic_set(&pit->pit_state.pit_timer.pending, 0);
+       atomic_set(&pit->pit_state.pending, 0);
         pit->pit_state.irq_ack = 1;
  }
  
@@ -648,7 +635,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
         struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
  
         if (!mask) {
-               atomic_set(&pit->pit_state.pit_timer.pending, 0);
+               atomic_set(&pit->pit_state.pending, 0);
                 pit->pit_state.irq_ack = 1;
         }
  }
@@ -706,12 +693,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
  
         pit_state = &pit->pit_state;
         pit_state->pit = pit;
-       hrtimer_init(&pit_state->pit_timer.timer,
-                    CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+       hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
         pit_state->irq_ack_notifier.gsi = 0;
         pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
         kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
-       pit_state->pit_timer.reinject = true;
+       pit_state->reinject = true;
         mutex_unlock(&pit->pit_state.lock);
  
         kvm_pit_reset(pit);
@@ -761,7 +747,7 @@ void kvm_free_pit(struct kvm *kvm)
                 kvm_unregister_irq_ack_notifier(kvm,
                                 &kvm->arch.vpit->pit_state.irq_ack_notifier);
                 mutex_lock(&kvm->arch.vpit->pit_state.lock);
-               timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+               timer = &kvm->arch.vpit->pit_state.timer;
                 hrtimer_cancel(timer);
                 flush_kthread_work(&kvm->arch.vpit->expired);
                 kthread_stop(kvm->arch.vpit->worker_task);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h

index fdf40425ea1de2946bd40e31a1d1a6045ccdcb15..dd1b16b611b0ae6c9d2386a7e690e56a774f0b74 100644 (file)
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -24,8 +24,12 @@ struct kvm_kpit_channel_state {
  struct kvm_kpit_state {
         struct kvm_kpit_channel_state channels[3];
         u32 flags;
-       struct kvm_timer pit_timer;
         bool is_periodic;
+       s64 period;                             /* unit: ns */
+       struct hrtimer timer;
+       atomic_t pending;                       /* accumulated triggered timers */
+       bool reinject;
+       struct kvm *kvm;
         u32    speaker_data_on;
         struct mutex lock;
         struct kvm_pit *pit;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h

index 2086f2bfba33db1d11a119ef57fa82aa01763804..2d03568e9498356716b7504c195c71a912819f4d 100644 (file)
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -70,7 +70,7 @@ struct kvm_pic {
         struct kvm_io_device dev_slave;
         struct kvm_io_device dev_eclr;
         void (*ack_notifier)(void *opaque, int irq);
-       unsigned long irq_states[16];
+       unsigned long irq_states[PIC_NUM_PINS];
  };
  
  struct kvm_pic *kvm_create_pic(struct kvm *kvm);
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h

deleted file mode 100644 (file)

index 497dbaa..0000000
--- a/arch/x86/kvm/kvm_timer.h
+++ /dev/null
@@ -1,18 +0,0 @@
-
-struct kvm_timer {
-       struct hrtimer timer;
-       s64 period;                             /* unit: ns */
-       u32 timer_mode_mask;
-       u64 tscdeadline;
-       atomic_t pending;                       /* accumulated triggered timers */
-       bool reinject;
-       struct kvm_timer_ops *t_ops;
-       struct kvm *kvm;
-       struct kvm_vcpu *vcpu;
-};
-
-struct kvm_timer_ops {
-       bool (*is_periodic)(struct kvm_timer *);
-};
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c

index ce878788a39fd13e486fd3557011d5bf28a47d2d..0cd431c85d384eb6013406cf320e5e6c8877d4dc 100644 (file)
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -696,12 +696,14 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
  
                 val = apic_get_tmcct(apic);
                 break;
-
+       case APIC_PROCPRI:
+               apic_update_ppr(apic);
+               val = apic_get_reg(apic, offset);
+               break;
         case APIC_TASKPRI:
                 report_tpr_access(apic, false);
                 /* fall thru */
         default:
-               apic_update_ppr(apic);
                 val = apic_get_reg(apic, offset);
                 break;
         }
@@ -719,7 +721,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
  {
         unsigned char alignment = offset & 0xf;
         u32 result;
-       /* this bitmask has a bit cleared for each reserver register */
+       /* this bitmask has a bit cleared for each reserved register */
         static const u64 rmask = 0x43ff01ffffffe70cULL;
  
         if ((alignment + len) > 4) {
@@ -792,7 +794,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
         atomic_set(&apic->lapic_timer.pending, 0);
  
         if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
-               /* lapic timer in oneshot or peroidic mode */
+               /* lapic timer in oneshot or periodic mode */
                 now = apic->lapic_timer.timer.base->get_time();
                 apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
                             * APIC_BUS_CYCLE_NS * apic->divide_count;
@@ -1212,10 +1214,8 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
   *----------------------------------------------------------------------
   */
  
-static bool lapic_is_periodic(struct kvm_timer *ktimer)
+static bool lapic_is_periodic(struct kvm_lapic *apic)
  {
-       struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
-                                             lapic_timer);
         return apic_lvtt_period(apic);
  }
  
@@ -1251,15 +1251,40 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
                 kvm_apic_local_deliver(apic, APIC_LVT0);
  }
  
-static struct kvm_timer_ops lapic_timer_ops = {
-       .is_periodic = lapic_is_periodic,
-};
-
  static const struct kvm_io_device_ops apic_mmio_ops = {
         .read     = apic_mmio_read,
         .write    = apic_mmio_write,
  };
  
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
+{
+       struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+       struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
+       struct kvm_vcpu *vcpu = apic->vcpu;
+       wait_queue_head_t *q = &vcpu->wq;
+
+       /*
+        * There is a race window between reading and incrementing, but we do
+        * not care about potentially losing timer events in the !reinject
+        * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
+        * in vcpu_enter_guest.
+        */
+       if (!atomic_read(&ktimer->pending)) {
+               atomic_inc(&ktimer->pending);
+               /* FIXME: this code should not know anything about vcpus */
+               kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+       }
+
+       if (waitqueue_active(q))
+               wake_up_interruptible(q);
+
+       if (lapic_is_periodic(apic)) {
+               hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+               return HRTIMER_RESTART;
+       } else
+               return HRTIMER_NORESTART;
+}
+
  int kvm_create_lapic(struct kvm_vcpu *vcpu)
  {
         struct kvm_lapic *apic;
@@ -1283,10 +1308,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
  
         hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
                      HRTIMER_MODE_ABS);
-       apic->lapic_timer.timer.function = kvm_timer_fn;
-       apic->lapic_timer.t_ops = &lapic_timer_ops;
-       apic->lapic_timer.kvm = vcpu->kvm;
-       apic->lapic_timer.vcpu = vcpu;
+       apic->lapic_timer.timer.function = apic_timer_fn;
  
         apic->base_address = APIC_DEFAULT_PHYS_BASE;
         vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h

index 4af5405ae1e2f4e2822cde6bd844e73f0489e850..166766fffd9f8fecd4d311c61f7ec0b6fe4ae097 100644 (file)
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -2,10 +2,17 @@
  #define __KVM_X86_LAPIC_H
  
  #include "iodev.h"
-#include "kvm_timer.h"
  
  #include <linux/kvm_host.h>
  
+struct kvm_timer {
+       struct hrtimer timer;
+       s64 period;                             /* unit: ns */
+       u32 timer_mode_mask;
+       u64 tscdeadline;
+       atomic_t pending;                       /* accumulated triggered timers */
+};
+
  struct kvm_lapic {
         unsigned long base_address;
         struct kvm_io_device dev;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index 01ca00423938515cfe43781403e90bfb84929fc3..a9a20528e7001f085e1a141da6efaeef1a9fa854 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
                 return 0;
  
         pfn = spte_to_pfn(old_spte);
+
+       /*
+        * KVM does not hold the refcount of the page used by
+        * kvm mmu, before reclaiming the page, we should
+        * unmap it from mmu first.
+        */
+       WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn)));
+
         if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
                 kvm_set_pfn_accessed(pfn);
         if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
@@ -960,13 +968,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
  static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
                                     struct kvm_memory_slot *slot)
  {
-       struct kvm_lpage_info *linfo;
+       unsigned long idx;
  
         if (likely(level == PT_PAGE_TABLE_LEVEL))
                 return &slot->rmap[gfn - slot->base_gfn];
  
-       linfo = lpage_info_slot(gfn, slot, level);
-       return &linfo->rmap_pde;
+       idx = gfn_to_index(gfn, slot->base_gfn, level);
+       return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
  }
  
  /*
@@ -1200,7 +1208,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
  }
  
  static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                          unsigned long data)
+                          struct kvm_memory_slot *slot, unsigned long data)
  {
         u64 *sptep;
         struct rmap_iterator iter;
@@ -1218,7 +1226,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
  }
  
  static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                            unsigned long data)
+                            struct kvm_memory_slot *slot, unsigned long data)
  {
         u64 *sptep;
         struct rmap_iterator iter;
@@ -1259,43 +1267,67 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
         return 0;
  }
  
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-                         unsigned long data,
-                         int (*handler)(struct kvm *kvm, unsigned long *rmapp,
-                                        unsigned long data))
+static int kvm_handle_hva_range(struct kvm *kvm,
+                               unsigned long start,
+                               unsigned long end,
+                               unsigned long data,
+                               int (*handler)(struct kvm *kvm,
+                                              unsigned long *rmapp,
+                                              struct kvm_memory_slot *slot,
+                                              unsigned long data))
  {
         int j;
-       int ret;
-       int retval = 0;
+       int ret = 0;
         struct kvm_memslots *slots;
         struct kvm_memory_slot *memslot;
  
         slots = kvm_memslots(kvm);
  
         kvm_for_each_memslot(memslot, slots) {
-               unsigned long start = memslot->userspace_addr;
-               unsigned long end;
+               unsigned long hva_start, hva_end;
+               gfn_t gfn_start, gfn_end;
  
-               end = start + (memslot->npages << PAGE_SHIFT);
-               if (hva >= start && hva < end) {
-                       gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
-                       gfn_t gfn = memslot->base_gfn + gfn_offset;
+               hva_start = max(start, memslot->userspace_addr);
+               hva_end = min(end, memslot->userspace_addr +
+                                       (memslot->npages << PAGE_SHIFT));
+               if (hva_start >= hva_end)
+                       continue;
+               /*
+                * {gfn(page) | page intersects with [hva_start, hva_end)} =
+                * {gfn_start, gfn_start+1, ..., gfn_end-1}.
+                */
+               gfn_start = hva_to_gfn_memslot(hva_start, memslot);
+               gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
  
-                       ret = handler(kvm, &memslot->rmap[gfn_offset], data);
+               for (j = PT_PAGE_TABLE_LEVEL;
+                    j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+                       unsigned long idx, idx_end;
+                       unsigned long *rmapp;
  
-                       for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
-                               struct kvm_lpage_info *linfo;
+                       /*
+                        * {idx(page_j) | page_j intersects with
+                        *  [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}.
+                        */
+                       idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
+                       idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j);
  
-                               linfo = lpage_info_slot(gfn, memslot,
-                                                       PT_DIRECTORY_LEVEL + j);
-                               ret |= handler(kvm, &linfo->rmap_pde, data);
-                       }
-                       trace_kvm_age_page(hva, memslot, ret);
-                       retval |= ret;
+                       rmapp = __gfn_to_rmap(gfn_start, j, memslot);
+
+                       for (; idx <= idx_end; ++idx)
+                               ret |= handler(kvm, rmapp++, memslot, data);
                 }
         }
  
-       return retval;
+       return ret;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+                         unsigned long data,
+                         int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+                                        struct kvm_memory_slot *slot,
+                                        unsigned long data))
+{
+       return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
  }
  
  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -1303,13 +1335,18 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
         return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
  }
  
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+       return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+}
+
  void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
  {
         kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
  }
  
  static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                        unsigned long data)
+                        struct kvm_memory_slot *slot, unsigned long data)
  {
         u64 *sptep;
         struct rmap_iterator uninitialized_var(iter);
@@ -1323,8 +1360,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
          * This has some overhead, but not as much as the cost of swapping
          * out actively used pages or breaking up actively used hugepages.
          */
-       if (!shadow_accessed_mask)
-               return kvm_unmap_rmapp(kvm, rmapp, data);
+       if (!shadow_accessed_mask) {
+               young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+               goto out;
+       }
  
         for (sptep = rmap_get_first(*rmapp, &iter); sptep;
              sptep = rmap_get_next(&iter)) {
@@ -1336,12 +1375,14 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
                                  (unsigned long *)sptep);
                 }
         }
-
+out:
+       /* @data has hva passed to kvm_age_hva(). */
+       trace_kvm_age_page(data, slot, young);
         return young;
  }
  
  static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                             unsigned long data)
+                             struct kvm_memory_slot *slot, unsigned long data)
  {
         u64 *sptep;
         struct rmap_iterator iter;
@@ -1379,13 +1420,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
  
         rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
  
-       kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
+       kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
         kvm_flush_remote_tlbs(vcpu->kvm);
  }
  
  int kvm_age_hva(struct kvm *kvm, unsigned long hva)
  {
-       return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
+       return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
  }
  
  int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
@@ -2472,14 +2513,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
         unsigned long hva;
  
         slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
-       if (!slot) {
-               get_page(fault_page);
-               return page_to_pfn(fault_page);
-       }
+       if (!slot)
+               return get_fault_pfn();
  
         hva = gfn_to_hva_memslot(slot, gfn);
  
-       return hva_to_pfn_atomic(vcpu->kvm, hva);
+       return hva_to_pfn_atomic(hva);
  }
  
  static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
@@ -3236,7 +3275,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
         if (!async)
                 return false; /* *pfn has correct page already */
  
-       put_page(pfn_to_page(*pfn));
+       kvm_release_pfn_clean(*pfn);
  
         if (!prefault && can_do_async_pf(vcpu)) {
                 trace_kvm_try_async_get_page(gva, gfn);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c

index 9b7ec1150ab01ad1390217cc04176d255c5d8382..cfc258a6bf97a1efda8b97bb0ae4fd4bd21a9f23 100644 (file)
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -1,5 +1,5 @@
  /*
- * Kernel-based Virtual Machine -- Performane Monitoring Unit support
+ * Kernel-based Virtual Machine -- Performance Monitoring Unit support
   *
   * Copyright 2011 Red Hat, Inc. and/or its affiliates.
   *
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index baead950d6c82cfb3ae530c198ab9d022857170f..687d0c30e559884a4da200023dff435b2d041a01 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2063,7 +2063,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
         if (svm->nested.intercept & 1ULL) {
                 /*
                  * The #vmexit can't be emulated here directly because this
-                * code path runs with irqs and preemtion disabled. A
+                * code path runs with irqs and preemption disabled. A
                  * #vmexit emulation might sleep. Only signal request for
                  * the #vmexit here.
                  */
@@ -2409,7 +2409,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
  {
         /*
          * This function merges the msr permission bitmaps of kvm and the
-        * nested vmcb. It is omptimized in that it only merges the parts where
+        * nested vmcb. It is optimized in that it only merges the parts where
          * the kvm msr permission bitmap may contain zero bits
          */
         int i;
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c

deleted file mode 100644 (file)

index 6b85cc6..0000000
--- a/arch/x86/kvm/timer.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Kernel-based Virtual Machine driver for Linux
- *
- * This module enables machines with Intel VT-x extensions to run virtual
- * machines without emulation or binary translation.
- *
- * timer support
- *
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/hrtimer.h>
-#include <linux/atomic.h>
-#include "kvm_timer.h"
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
-{
-       struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-       struct kvm_vcpu *vcpu = ktimer->vcpu;
-       wait_queue_head_t *q = &vcpu->wq;
-
-       /*
-        * There is a race window between reading and incrementing, but we do
-        * not care about potentially losing timer events in the !reinject
-        * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-        * in vcpu_enter_guest.
-        */
-       if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
-               atomic_inc(&ktimer->pending);
-               /* FIXME: this code should not know anything about vcpus */
-               kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-       }
-
-       if (waitqueue_active(q))
-               wake_up_interruptible(q);
-
-       if (ktimer->t_ops->is_periodic(ktimer)) {
-               hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
-               return HRTIMER_RESTART;
-       } else
-               return HRTIMER_NORESTART;
-}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index c00f03de1b794af8fe65387747813d57ed2885a2..d6e4cbc42b8e8754f4c7822e1b2ad09f3f47946c 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1343,7 +1343,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
         guest_efer = vmx->vcpu.arch.efer;
  
         /*
-        * NX is emulated; LMA and LME handled by hardware; SCE meaninless
+        * NX is emulated; LMA and LME handled by hardware; SCE meaningless
          * outside long mode
          */
         ignore_bits = EFER_NX | EFER_SCE;
@@ -3254,7 +3254,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
          * qemu binaries.
          *   IA32 arch specifies that at the time of processor reset the
          * "Accessed" bit in the AR field of segment registers is 1. And qemu
-        * is setting it to 0 in the usedland code. This causes invalid guest
+        * is setting it to 0 in the userland code. This causes invalid guest
          * state vmexit when "unrestricted guest" mode is turned on.
          *    Fix for this setup issue in cpu_reset is being pushed in the qemu
          * tree. Newer qemu binaries with that qemu fix would not need this
@@ -4439,7 +4439,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
         hypercall[2] = 0xc1;
  }
  
-/* called to set cr0 as approriate for a mov-to-cr0 exit. */
+/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
  static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
  {
         if (to_vmx(vcpu)->nested.vmxon &&
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 42bce48f692850cf3cadf96e83c86e5f8bf760ee..3ca90d74711dc244afe36ca3f01595a1a69b0fda 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -806,7 +806,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
   * kvm-specific. Those are put in the beginning of the list.
   */
  
-#define KVM_SAVE_MSRS_BEGIN    9
+#define KVM_SAVE_MSRS_BEGIN    10
  static u32 msrs_to_save[] = {
         MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
         MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
@@ -1097,7 +1097,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
                  * For each generation, we track the original measured
                  * nanosecond time, offset, and write, so if TSCs are in
                  * sync, we can match exact offset, and if not, we can match
-                * exact software computaion in compute_guest_tsc()
+                * exact software computation in compute_guest_tsc()
                  *
                  * These values are tracked in kvm->arch.cur_xxx variables.
                  */
@@ -1504,7 +1504,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
  {
         gpa_t gpa = data & ~0x3f;
  
-       /* Bits 2:5 are resrved, Should be zero */
+       /* Bits 2:5 are reserved, Should be zero */
         if (data & 0x3c)
                 return 1;
  
@@ -1727,7 +1727,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                  * Ignore all writes to this no longer documented MSR.
                  * Writes are only relevant for old K7 processors,
                  * all pre-dating SVM, but a recommended workaround from
-                * AMD for these chips. It is possible to speicify the
+                * AMD for these chips. It is possible to specify the
                  * affected processor models on the command line, hence
                  * the need to ignore the workaround.
                  */
@@ -2636,7 +2636,6 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
         if (!vcpu->arch.time_page)
                 return -EINVAL;
         src->flags |= PVCLOCK_GUEST_STOPPED;
-       mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
         return 0;
  }
@@ -3087,7 +3086,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
         if (!kvm->arch.vpit)
                 return -ENXIO;
         mutex_lock(&kvm->arch.vpit->pit_state.lock);
-       kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+       kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
         mutex_unlock(&kvm->arch.vpit->pit_state.lock);
         return 0;
  }
@@ -3170,6 +3169,16 @@ out:
         return r;
  }
  
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+       if (!irqchip_in_kernel(kvm))
+               return -ENXIO;
+
+       irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+                                       irq_event->irq, irq_event->level);
+       return 0;
+}
+
  long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
  {
@@ -3276,29 +3285,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
         create_pit_unlock:
                 mutex_unlock(&kvm->slots_lock);
                 break;
-       case KVM_IRQ_LINE_STATUS:
-       case KVM_IRQ_LINE: {
-               struct kvm_irq_level irq_event;
-
-               r = -EFAULT;
-               if (copy_from_user(&irq_event, argp, sizeof irq_event))
-                       goto out;
-               r = -ENXIO;
-               if (irqchip_in_kernel(kvm)) {
-                       __s32 status;
-                       status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-                                       irq_event.irq, irq_event.level);
-                       if (ioctl == KVM_IRQ_LINE_STATUS) {
-                               r = -EFAULT;
-                               irq_event.status = status;
-                               if (copy_to_user(argp, &irq_event,
-                                                       sizeof irq_event))
-                                       goto out;
-                       }
-                       r = 0;
-               }
-               break;
-       }
         case KVM_GET_IRQCHIP: {
                 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
                 struct kvm_irqchip *chip;
@@ -4496,7 +4482,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
  
         /*
          * if emulation was due to access to shadowed page table
-        * and it failed try to unshadow page and re-entetr the
+        * and it failed try to unshadow page and re-enter the
          * guest to let CPU execute the instruction.
          */
         if (kvm_mmu_unprotect_page_virt(vcpu, gva))
@@ -5592,7 +5578,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
                 /*
                  * We are here if userspace calls get_regs() in the middle of
                  * instruction emulation. Registers state needs to be copied
-                * back from emulation context to vcpu. Usrapace shouldn't do
+                * back from emulation context to vcpu. Userspace shouldn't do
                  * that usually, but some bad designed PV devices (vmware
                  * backdoor interface) need this to work
                  */
@@ -6121,7 +6107,7 @@ int kvm_arch_hardware_enable(void *garbage)
          * as we reset last_host_tsc on all VCPUs to stop this from being
          * called multiple times (one for each physical CPU bringup).
          *
-        * Platforms with unnreliable TSCs don't have to deal with this, they
+        * Platforms with unreliable TSCs don't have to deal with this, they
          * will be compensated by the logic in vcpu_load, which sets the TSC to
          * catchup mode.  This will catchup all VCPUs to real time, but cannot
          * guarantee that they stay in perfect synchronization.
@@ -6318,6 +6304,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
         int i;
  
         for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+               if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
+                       kvm_kvfree(free->arch.rmap_pde[i]);
+                       free->arch.rmap_pde[i] = NULL;
+               }
                 if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
                         kvm_kvfree(free->arch.lpage_info[i]);
                         free->arch.lpage_info[i] = NULL;
@@ -6337,6 +6327,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
                 lpages = gfn_to_index(slot->base_gfn + npages - 1,
                                       slot->base_gfn, level) + 1;
  
+               slot->arch.rmap_pde[i] =
+                       kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
+               if (!slot->arch.rmap_pde[i])
+                       goto out_free;
+
                 slot->arch.lpage_info[i] =
                         kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
                 if (!slot->arch.lpage_info[i])
@@ -6365,7 +6360,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
  
  out_free:
         for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+               kvm_kvfree(slot->arch.rmap_pde[i]);
                 kvm_kvfree(slot->arch.lpage_info[i]);
+               slot->arch.rmap_pde[i] = NULL;
                 slot->arch.lpage_info[i] = NULL;
         }
         return -ENOMEM;
@@ -6385,7 +6382,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                 map_flags = MAP_SHARED | MAP_ANONYMOUS;
  
         /*To keep backward compatibility with older userspace,
-        *x86 needs to hanlde !user_alloc case.
+        *x86 needs to handle !user_alloc case.
          */
         if (!user_alloc) {
                 if (npages && !old.rmap) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index b70b48b01098c569b00cebc4a80cc2dbd691cdaf..dbc65f9d6a2b6b32192fd0d25c4bb674aef3bf50 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -183,6 +183,18 @@ struct kvm_vcpu {
         } async_pf;
  #endif
  
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+       /*
+        * Cpu relax intercept or pause loop exit optimization
+        * in_spin_loop: set when a vcpu does a pause loop exit
+        *  or cpu relax intercepted.
+        * dy_eligible: indicates whether vcpu is eligible for directed yield.
+        */
+       struct {
+               bool in_spin_loop;
+               bool dy_eligible;
+       } spin_loop;
+#endif
         struct kvm_vcpu_arch arch;
  };
  
@@ -378,20 +390,11 @@ id_to_memslot(struct kvm_memslots *slots, int id)
         return slot;
  }
  
-#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
-#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
-static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
-
  extern struct page *bad_page;
-extern struct page *fault_page;
-
-extern pfn_t bad_pfn;
-extern pfn_t fault_pfn;
  
  int is_error_page(struct page *page);
  int is_error_pfn(pfn_t pfn);
  int is_hwpoison_pfn(pfn_t pfn);
-int is_fault_pfn(pfn_t pfn);
  int is_noslot_pfn(pfn_t pfn);
  int is_invalid_pfn(pfn_t pfn);
  int kvm_is_error_hva(unsigned long addr);
@@ -420,6 +423,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm);
  int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
                             int nr_pages);
  
+struct page *get_bad_page(void);
  struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
  unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
  void kvm_release_page_clean(struct page *page);
@@ -427,20 +431,20 @@ void kvm_release_page_dirty(struct page *page);
  void kvm_set_page_dirty(struct page *page);
  void kvm_set_page_accessed(struct page *page);
  
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
+pfn_t hva_to_pfn_atomic(unsigned long addr);
  pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
  pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
                        bool write_fault, bool *writable);
  pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
  pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
                       bool *writable);
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
-                        struct kvm_memory_slot *slot, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
  void kvm_release_pfn_dirty(pfn_t);
  void kvm_release_pfn_clean(pfn_t pfn);
  void kvm_set_pfn_dirty(pfn_t pfn);
  void kvm_set_pfn_accessed(pfn_t pfn);
  void kvm_get_pfn(pfn_t pfn);
+pfn_t get_fault_pfn(void);
  
  int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
                         int len);
@@ -494,6 +498,7 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
                                    struct
                                    kvm_userspace_memory_region *mem,
                                    int user_alloc);
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
  long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg);
  
@@ -573,7 +578,7 @@ void kvm_arch_sync_events(struct kvm *kvm);
  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
  
-int kvm_is_mmio_pfn(pfn_t pfn);
+bool kvm_is_mmio_pfn(pfn_t pfn);
  
  struct kvm_irq_ack_notifier {
         struct hlist_node link;
@@ -740,6 +745,14 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
                 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
  }
  
+static inline gfn_t
+hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
+{
+       gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;
+
+       return slot->base_gfn + gfn_offset;
+}
+
  static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
                                                gfn_t gfn)
  {
@@ -899,5 +912,32 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
         }
  }
  
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+
+static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
+{
+       vcpu->spin_loop.in_spin_loop = val;
+}
+static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
+{
+       vcpu->spin_loop.dy_eligible = val;
+}
+
+#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
+
+static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
+{
+}
+
+static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
+{
+}
+
+static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+{
+       return true;
+}
+
+#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
  #endif
  
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig

index 28694f4a91398998f569c234c7faee104a945259..d01b24b72c61e75f3225776541c2f99824df443e 100644 (file)
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF
  
  config HAVE_KVM_MSI
         bool
+
+config HAVE_KVM_CPU_RELAX_INTERCEPT
+       bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c

index 74268b4c2ee167932d514281bf6ecc3187462a3e..79722782d9d7227e5179a92a14833e2f32ce5d1c 100644 (file)
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -112,7 +112,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                                    typeof(*work), link);
                 list_del(&work->link);
                 if (work->page)
-                       put_page(work->page);
+                       kvm_release_page_clean(work->page);
                 kmem_cache_free(async_pf_cache, work);
         }
         spin_unlock(&vcpu->async_pf.lock);
@@ -139,7 +139,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
                 list_del(&work->queue);
                 vcpu->async_pf.queued--;
                 if (work->page)
-                       put_page(work->page);
+                       kvm_release_page_clean(work->page);
                 kmem_cache_free(async_pf_cache, work);
         }
  }
@@ -203,8 +203,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
         if (!work)
                 return -ENOMEM;
  
-       work->page = bad_page;
-       get_page(bad_page);
+       work->page = get_bad_page();
         INIT_LIST_HEAD(&work->queue); /* for list_del to work */
  
         spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c

index e9fff9830bf0bf6f2229603516ebdd633996fd20..c03f1fb26701f026a41fb5eccc194526530f67ea 100644 (file)
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
  static void kvm_iommu_put_pages(struct kvm *kvm,
                                 gfn_t base_gfn, unsigned long npages);
  
-static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
-                          gfn_t gfn, unsigned long size)
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+                          unsigned long size)
  {
         gfn_t end_gfn;
         pfn_t pfn;
  
-       pfn     = gfn_to_pfn_memslot(kvm, slot, gfn);
+       pfn     = gfn_to_pfn_memslot(slot, gfn);
         end_gfn = gfn + (size >> PAGE_SHIFT);
         gfn    += 1;
  
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
                 return pfn;
  
         while (gfn < end_gfn)
-               gfn_to_pfn_memslot(kvm, slot, gfn++);
+               gfn_to_pfn_memslot(slot, gfn++);
  
         return pfn;
  }
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
                  * Pin all pages we are about to map in memory. This is
                  * important because we unmap and unpin in 4kb steps later.
                  */
-               pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
+               pfn = kvm_pin_pages(slot, gfn, page_size);
                 if (is_error_pfn(pfn)) {
                         gfn += 1;
                         continue;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c

index 83402d74a767bec214d2c2467cceb3a271a6be44..7118be0f2f2c804e33e9944a1afb92dce4f72b2b 100644 (file)
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -321,11 +321,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
                 switch (ue->u.irqchip.irqchip) {
                 case KVM_IRQCHIP_PIC_MASTER:
                         e->set = kvm_set_pic_irq;
-                       max_pin = 16;
+                       max_pin = PIC_NUM_PINS;
                         break;
                 case KVM_IRQCHIP_PIC_SLAVE:
                         e->set = kvm_set_pic_irq;
-                       max_pin = 16;
+                       max_pin = PIC_NUM_PINS;
                         delta = 8;
                         break;
                 case KVM_IRQCHIP_IOAPIC:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 246852397e301ee86ac5c40f653e4695eae659a8..bcf973ec98ff43e3f882830dd64b3f33218e37c2 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,14 +100,11 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
  
  static bool largepages_enabled = true;
  
-static struct page *hwpoison_page;
-static pfn_t hwpoison_pfn;
-
-struct page *fault_page;
-pfn_t fault_pfn;
-
-inline int kvm_is_mmio_pfn(pfn_t pfn)
+bool kvm_is_mmio_pfn(pfn_t pfn)
  {
+       if (is_error_pfn(pfn))
+               return false;
+
         if (pfn_valid(pfn)) {
                 int reserved;
                 struct page *tail = pfn_to_page(pfn);
@@ -236,6 +233,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
         }
         vcpu->run = page_address(page);
  
+       kvm_vcpu_set_in_spin_loop(vcpu, false);
+       kvm_vcpu_set_dy_eligible(vcpu, false);
+
         r = kvm_arch_vcpu_init(vcpu);
         if (r < 0)
                 goto fail_free_run;
@@ -332,8 +332,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
          * count is also read inside the mmu_lock critical section.
          */
         kvm->mmu_notifier_count++;
-       for (; start < end; start += PAGE_SIZE)
-               need_tlb_flush |= kvm_unmap_hva(kvm, start);
+       need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
         need_tlb_flush |= kvm->tlbs_dirty;
         /* we've to flush the tlb before the pages can be freed */
         if (need_tlb_flush)
@@ -934,40 +933,55 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages);
  
  int is_error_page(struct page *page)
  {
-       return page == bad_page || page == hwpoison_page || page == fault_page;
+       return IS_ERR(page);
  }
  EXPORT_SYMBOL_GPL(is_error_page);
  
  int is_error_pfn(pfn_t pfn)
  {
-       return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn;
+       return IS_ERR_VALUE(pfn);
  }
  EXPORT_SYMBOL_GPL(is_error_pfn);
  
-int is_hwpoison_pfn(pfn_t pfn)
+static pfn_t get_bad_pfn(void)
  {
-       return pfn == hwpoison_pfn;
+       return -ENOENT;
  }
-EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
  
-int is_fault_pfn(pfn_t pfn)
+pfn_t get_fault_pfn(void)
  {
-       return pfn == fault_pfn;
+       return -EFAULT;
  }
-EXPORT_SYMBOL_GPL(is_fault_pfn);
+EXPORT_SYMBOL_GPL(get_fault_pfn);
+
+static pfn_t get_hwpoison_pfn(void)
+{
+       return -EHWPOISON;
+}
+
+int is_hwpoison_pfn(pfn_t pfn)
+{
+       return pfn == -EHWPOISON;
+}
+EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
  
  int is_noslot_pfn(pfn_t pfn)
  {
-       return pfn == bad_pfn;
+       return pfn == -ENOENT;
  }
  EXPORT_SYMBOL_GPL(is_noslot_pfn);
  
  int is_invalid_pfn(pfn_t pfn)
  {
-       return pfn == hwpoison_pfn || pfn == fault_pfn;
+       return !is_noslot_pfn(pfn) && is_error_pfn(pfn);
  }
  EXPORT_SYMBOL_GPL(is_invalid_pfn);
  
+struct page *get_bad_page(void)
+{
+       return ERR_PTR(-ENOENT);
+}
+
  static inline unsigned long bad_hva(void)
  {
         return PAGE_OFFSET;
@@ -1039,12 +1053,6 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
  }
  EXPORT_SYMBOL_GPL(gfn_to_hva);
  
-static pfn_t get_fault_pfn(void)
-{
-       get_page(fault_page);
-       return fault_pfn;
-}
-
  int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
         unsigned long start, int write, struct page **page)
  {
@@ -1065,8 +1073,8 @@ static inline int check_user_page_hwpoison(unsigned long addr)
         return rc == -EHWPOISON;
  }
  
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
-                       bool *async, bool write_fault, bool *writable)
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+                       bool write_fault, bool *writable)
  {
         struct page *page[1];
         int npages = 0;
@@ -1122,8 +1130,7 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
                 if (npages == -EHWPOISON ||
                         (!async && check_user_page_hwpoison(addr))) {
                         up_read(&current->mm->mmap_sem);
-                       get_page(hwpoison_page);
-                       return page_to_pfn(hwpoison_page);
+                       return get_hwpoison_pfn();
                 }
  
                 vma = find_vma_intersection(current->mm, addr, addr+1);
@@ -1146,9 +1153,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
         return pfn;
  }
  
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+pfn_t hva_to_pfn_atomic(unsigned long addr)
  {
-       return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
+       return hva_to_pfn(addr, true, NULL, true, NULL);
  }
  EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
  
@@ -1161,12 +1168,10 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
                 *async = false;
  
         addr = gfn_to_hva(kvm, gfn);
-       if (kvm_is_error_hva(addr)) {
-               get_page(bad_page);
-               return page_to_pfn(bad_page);
-       }
+       if (kvm_is_error_hva(addr))
+               return get_bad_pfn();
  
-       return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
+       return hva_to_pfn(addr, atomic, async, write_fault, writable);
  }
  
  pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1195,11 +1200,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
  }
  EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
  
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
-                        struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
  {
         unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-       return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
+       return hva_to_pfn(addr, false, NULL, true, NULL);
  }
  
  int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -1219,37 +1223,45 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
  }
  EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
  
+static struct page *kvm_pfn_to_page(pfn_t pfn)
+{
+       WARN_ON(kvm_is_mmio_pfn(pfn));
+
+       if (is_error_pfn(pfn) || kvm_is_mmio_pfn(pfn))
+               return get_bad_page();
+
+       return pfn_to_page(pfn);
+}
+
  struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
  {
         pfn_t pfn;
  
         pfn = gfn_to_pfn(kvm, gfn);
-       if (!kvm_is_mmio_pfn(pfn))
-               return pfn_to_page(pfn);
-
-       WARN_ON(kvm_is_mmio_pfn(pfn));
  
-       get_page(bad_page);
-       return bad_page;
+       return kvm_pfn_to_page(pfn);
  }
  
  EXPORT_SYMBOL_GPL(gfn_to_page);
  
  void kvm_release_page_clean(struct page *page)
  {
-       kvm_release_pfn_clean(page_to_pfn(page));
+       if (!is_error_page(page))
+               kvm_release_pfn_clean(page_to_pfn(page));
  }
  EXPORT_SYMBOL_GPL(kvm_release_page_clean);
  
  void kvm_release_pfn_clean(pfn_t pfn)
  {
-       if (!kvm_is_mmio_pfn(pfn))
+       if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
                 put_page(pfn_to_page(pfn));
  }
  EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
  
  void kvm_release_page_dirty(struct page *page)
  {
+       WARN_ON(is_error_page(page));
+
         kvm_release_pfn_dirty(page_to_pfn(page));
  }
  EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
@@ -1580,6 +1592,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
  }
  EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
  
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+/*
+ * Helper that checks whether a VCPU is eligible for directed yield.
+ * Most eligible candidate to yield is decided by following heuristics:
+ *
+ *  (a) VCPU which has not done pl-exit or cpu relax intercepted recently
+ *  (preempted lock holder), indicated by @in_spin_loop.
+ *  Set at the beiginning and cleared at the end of interception/PLE handler.
+ *
+ *  (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
+ *  chance last time (mostly it has become eligible now since we have probably
+ *  yielded to lockholder in last iteration. This is done by toggling
+ *  @dy_eligible each time a VCPU checked for eligibility.)
+ *
+ *  Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
+ *  to preempted lock-holder could result in wrong VCPU selection and CPU
+ *  burning. Giving priority for a potential lock-holder increases lock
+ *  progress.
+ *
+ *  Since algorithm is based on heuristics, accessing another VCPU data without
+ *  locking does not harm. It may result in trying to yield to  same VCPU, fail
+ *  and continue with next VCPU and so on.
+ */
+bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+{
+       bool eligible;
+
+       eligible = !vcpu->spin_loop.in_spin_loop ||
+                       (vcpu->spin_loop.in_spin_loop &&
+                        vcpu->spin_loop.dy_eligible);
+
+       if (vcpu->spin_loop.in_spin_loop)
+               kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
+
+       return eligible;
+}
+#endif
  void kvm_vcpu_on_spin(struct kvm_vcpu *me)
  {
         struct kvm *kvm = me->kvm;
@@ -1589,6 +1638,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
         int pass;
         int i;
  
+       kvm_vcpu_set_in_spin_loop(me, true);
         /*
          * We boost the priority of a VCPU that is runnable but not
          * currently running, because it got preempted by something
@@ -1607,6 +1657,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
                                 continue;
                         if (waitqueue_active(&vcpu->wq))
                                 continue;
+                       if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
+                               continue;
                         if (kvm_vcpu_yield_to(vcpu)) {
                                 kvm->last_boosted_vcpu = i;
                                 yielded = 1;
@@ -1614,6 +1666,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
                         }
                 }
         }
+       kvm_vcpu_set_in_spin_loop(me, false);
+
+       /* Ensure vcpu is not eligible during next spinloop */
+       kvm_vcpu_set_dy_eligible(me, false);
  }
  EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
  
@@ -2092,6 +2148,29 @@ static long kvm_vm_ioctl(struct file *filp,
                 r = kvm_send_userspace_msi(kvm, &msi);
                 break;
         }
+#endif
+#ifdef __KVM_HAVE_IRQ_LINE
+       case KVM_IRQ_LINE_STATUS:
+       case KVM_IRQ_LINE: {
+               struct kvm_irq_level irq_event;
+
+               r = -EFAULT;
+               if (copy_from_user(&irq_event, argp, sizeof irq_event))
+                       goto out;
+
+               r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
+               if (r)
+                       goto out;
+
+               r = -EFAULT;
+               if (ioctl == KVM_IRQ_LINE_STATUS) {
+                       if (copy_to_user(argp, &irq_event, sizeof irq_event))
+                               goto out;
+               }
+
+               r = 0;
+               break;
+       }
  #endif
         default:
                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
@@ -2697,9 +2776,6 @@ static struct syscore_ops kvm_syscore_ops = {
         .resume = kvm_resume,
  };
  
-struct page *bad_page;
-pfn_t bad_pfn;
-
  static inline
  struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
  {
@@ -2731,33 +2807,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
         if (r)
                 goto out_fail;
  
-       bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-
-       if (bad_page == NULL) {
-               r = -ENOMEM;
-               goto out;
-       }
-
-       bad_pfn = page_to_pfn(bad_page);
-
-       hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-
-       if (hwpoison_page == NULL) {
-               r = -ENOMEM;
-               goto out_free_0;
-       }
-
-       hwpoison_pfn = page_to_pfn(hwpoison_page);
-
-       fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-
-       if (fault_page == NULL) {
-               r = -ENOMEM;
-               goto out_free_0;
-       }
-
-       fault_pfn = page_to_pfn(fault_page);
-
         if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
                 r = -ENOMEM;
                 goto out_free_0;
@@ -2832,12 +2881,6 @@ out_free_1:
  out_free_0a:
         free_cpumask_var(cpus_hardware_enabled);
  out_free_0:
-       if (fault_page)
-               __free_page(fault_page);
-       if (hwpoison_page)
-               __free_page(hwpoison_page);
-       __free_page(bad_page);
-out:
         kvm_arch_exit();
  out_fail:
         return r;
@@ -2857,8 +2900,5 @@ void kvm_exit(void)
         kvm_arch_hardware_unsetup();
         kvm_arch_exit();
         free_cpumask_var(cpus_hardware_enabled);
-       __free_page(fault_page);
-       __free_page(hwpoison_page);
-       __free_page(bad_page);
  }
  EXPORT_SYMBOL_GPL(kvm_exit);
author	Avi Kivity <avi@redhat.com>
	Sun, 5 Aug 2012 10:25:10 +0000 (13:25 +0300)
committer	Avi Kivity <avi@redhat.com>
	Sun, 5 Aug 2012 10:25:10 +0000 (13:25 +0300)
arch/ia64/kvm/kvm-ia64.c		patch \| blob \| blame \| history
arch/powerpc/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_64_mmu_hv.c		patch \| blob \| blame \| history
arch/powerpc/kvm/e500_tlb.c		patch \| blob \| blame \| history
arch/s390/include/asm/processor.h		patch \| blob \| blame \| history
arch/s390/kernel/dis.c		patch \| blob \| blame \| history
arch/s390/kvm/Kconfig		patch \| blob \| blame \| history
arch/s390/kvm/diag.c		patch \| blob \| blame \| history
arch/s390/kvm/intercept.c		patch \| blob \| blame \| history
arch/s390/kvm/interrupt.c		patch \| blob \| blame \| history
arch/s390/kvm/kvm-s390.c		patch \| blob \| blame \| history
arch/s390/kvm/priv.c		patch \| blob \| blame \| history
arch/s390/kvm/sigp.c		patch \| blob \| blame \| history
arch/s390/kvm/trace-s390.h	[new file with mode: 0644]	patch \| blob
arch/s390/kvm/trace.h	[new file with mode: 0644]	patch \| blob
arch/x86/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/x86/kvm/Kconfig		patch \| blob \| blame \| history
arch/x86/kvm/Makefile		patch \| blob \| blame \| history
arch/x86/kvm/cpuid.c		patch \| blob \| blame \| history
arch/x86/kvm/emulate.c		patch \| blob \| blame \| history
arch/x86/kvm/i8254.c		patch \| blob \| blame \| history
arch/x86/kvm/i8254.h		patch \| blob \| blame \| history
arch/x86/kvm/irq.h		patch \| blob \| blame \| history
arch/x86/kvm/kvm_timer.h	[deleted file]	patch \| blob \| blame \| history
arch/x86/kvm/lapic.c		patch \| blob \| blame \| history
arch/x86/kvm/lapic.h		patch \| blob \| blame \| history
arch/x86/kvm/mmu.c		patch \| blob \| blame \| history
arch/x86/kvm/pmu.c		patch \| blob \| blame \| history
arch/x86/kvm/svm.c		patch \| blob \| blame \| history
arch/x86/kvm/timer.c	[deleted file]	patch \| blob \| blame \| history
arch/x86/kvm/vmx.c		patch \| blob \| blame \| history
arch/x86/kvm/x86.c		patch \| blob \| blame \| history
include/linux/kvm_host.h		patch \| blob \| blame \| history
virt/kvm/Kconfig		patch \| blob \| blame \| history
virt/kvm/async_pf.c		patch \| blob \| blame \| history
virt/kvm/iommu.c		patch \| blob \| blame \| history
virt/kvm/irq_comm.c		patch \| blob \| blame \| history
virt/kvm/kvm_main.c		patch \| blob \| blame \| history