]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - arch/powerpc/kvm/book3s_64_vio_hv.c
KVM: PPC: VFIO: Add in-kernel acceleration for VFIO
[mirror_ubuntu-zesty-kernel.git] / arch / powerpc / kvm / book3s_64_vio_hv.c
index e4c4ea973e57892ccae032eba7218d107fee6cd8..2325fc7c8db63d33efdc998e191ab5cd3bca0847 100644 (file)
 #include <asm/iommu.h>
 #include <asm/tce.h>
 
+#ifdef CONFIG_BUG
+
+#define WARN_ON_ONCE_RM(condition)     ({                      \
+       static bool __section(.data.unlikely) __warned;         \
+       int __ret_warn_once = !!(condition);                    \
+                                                               \
+       if (unlikely(__ret_warn_once && !__warned)) {           \
+               __warned = true;                                \
+               pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n",      \
+                               __stringify(condition),         \
+                               __func__, __LINE__);            \
+               dump_stack();                                   \
+       }                                                       \
+       unlikely(__ret_warn_once);                              \
+})
+
+#else
+
+#define WARN_ON_ONCE_RM(condition) ({                          \
+       int __ret_warn_on = !!(condition);                      \
+       unlikely(__ret_warn_on);                                \
+})
+
+#endif
+
 #define TCES_PER_PAGE  (PAGE_SIZE / sizeof(u64))
 
 /*
  * WARNING: This will be called in real or virtual mode on HV KVM and virtual
  *          mode on PR KVM
  */
-struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
+struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
                unsigned long liobn)
 {
-       struct kvm *kvm = vcpu->kvm;
        struct kvmppc_spapr_tce_table *stt;
 
        list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
@@ -179,15 +203,122 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
 EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+       unsigned long hpa = 0;
+       enum dma_data_direction dir = DMA_NONE;
+
+       iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+}
+
+static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+               struct iommu_table *tbl, unsigned long entry)
+{
+       struct mm_iommu_table_group_mem_t *mem = NULL;
+       const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+       unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+       if (!pua)
+               /* it_userspace allocation might be delayed */
+               return H_TOO_HARD;
+
+       pua = (void *) vmalloc_to_phys(pua);
+       if (WARN_ON_ONCE_RM(!pua))
+               return H_HARDWARE;
+
+       mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize);
+       if (!mem)
+               return H_TOO_HARD;
+
+       mm_iommu_mapped_dec(mem);
+
+       *pua = 0;
+
+       return H_SUCCESS;
+}
+
+static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
+               struct iommu_table *tbl, unsigned long entry)
+{
+       enum dma_data_direction dir = DMA_NONE;
+       unsigned long hpa = 0;
+       long ret;
+
+       if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
+               /*
+                * real mode xchg can fail if struct page crosses
+                * a page boundary
+                */
+               return H_TOO_HARD;
+
+       if (dir == DMA_NONE)
+               return H_SUCCESS;
+
+       ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+       if (ret)
+               iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+
+       return ret;
+}
+
+static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+               unsigned long entry, unsigned long ua,
+               enum dma_data_direction dir)
+{
+       long ret;
+       unsigned long hpa = 0;
+       unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+       struct mm_iommu_table_group_mem_t *mem;
+
+       if (!pua)
+               /* it_userspace allocation might be delayed */
+               return H_TOO_HARD;
+
+       mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+       if (!mem)
+               return H_TOO_HARD;
+
+       if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
+               return H_HARDWARE;
+
+       pua = (void *) vmalloc_to_phys(pua);
+       if (WARN_ON_ONCE_RM(!pua))
+               return H_HARDWARE;
+
+       if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
+               return H_CLOSED;
+
+       ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+       if (ret) {
+               mm_iommu_mapped_dec(mem);
+               /*
+                * real mode xchg can fail if struct page crosses
+                * a page boundary
+                */
+               return H_TOO_HARD;
+       }
+
+       if (dir != DMA_NONE)
+               kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+       *pua = ua;
+
+       return 0;
+}
+
 long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                unsigned long ioba, unsigned long tce)
 {
-       struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+       struct kvmppc_spapr_tce_table *stt;
        long ret;
+       struct kvmppc_spapr_tce_iommu_table *stit;
+       unsigned long entry, ua = 0;
+       enum dma_data_direction dir;
 
        /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
        /*          liobn, ioba, tce); */
 
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -199,7 +330,32 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
        if (ret != H_SUCCESS)
                return ret;
 
-       kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+       dir = iommu_tce_direction(tce);
+       if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
+                       tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+               return H_PARAMETER;
+
+       entry = ioba >> stt->page_shift;
+
+       list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+               if (dir == DMA_NONE)
+                       ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+                                       stit->tbl, entry);
+               else
+                       ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+                                       stit->tbl, entry, ua, dir);
+
+               if (ret == H_SUCCESS)
+                       continue;
+
+               if (ret == H_TOO_HARD)
+                       return ret;
+
+               WARN_ON_ONCE_RM(1);
+               kvmppc_rm_clear_tce(stit->tbl, entry);
+       }
+
+       kvmppc_tce_put(stt, entry, tce);
 
        return H_SUCCESS;
 }
@@ -239,8 +395,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
        long i, ret = H_SUCCESS;
        unsigned long tces, entry, ua = 0;
        unsigned long *rmap = NULL;
+       bool prereg = false;
+       struct kvmppc_spapr_tce_iommu_table *stit;
 
-       stt = kvmppc_find_table(vcpu, liobn);
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -259,23 +417,49 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
        if (ret != H_SUCCESS)
                return ret;
 
-       if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
-               return H_TOO_HARD;
+       if (mm_iommu_preregistered(vcpu->kvm->mm)) {
+               /*
+                * We get here if guest memory was pre-registered which
+                * is normally VFIO case and gpa->hpa translation does not
+                * depend on hpt.
+                */
+               struct mm_iommu_table_group_mem_t *mem;
 
-       rmap = (void *) vmalloc_to_phys(rmap);
+               if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+                       return H_TOO_HARD;
 
-       /*
-        * Synchronize with the MMU notifier callbacks in
-        * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
-        * While we have the rmap lock, code running on other CPUs
-        * cannot finish unmapping the host real page that backs
-        * this guest real page, so we are OK to access the host
-        * real page.
-        */
-       lock_rmap(rmap);
-       if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
-               ret = H_TOO_HARD;
-               goto unlock_exit;
+               mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
+               if (mem)
+                       prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0;
+       }
+
+       if (!prereg) {
+               /*
+                * This is usually a case of a guest with emulated devices only
+                * when TCE list is not in preregistered memory.
+                * We do not require memory to be preregistered in this case
+                * so lock rmap and do __find_linux_pte_or_hugepte().
+                */
+               if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+                       return H_TOO_HARD;
+
+               rmap = (void *) vmalloc_to_phys(rmap);
+               if (WARN_ON_ONCE_RM(!rmap))
+                       return H_HARDWARE;
+
+               /*
+                * Synchronize with the MMU notifier callbacks in
+                * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+                * While we have the rmap lock, code running on other CPUs
+                * cannot finish unmapping the host real page that backs
+                * this guest real page, so we are OK to access the host
+                * real page.
+                */
+               lock_rmap(rmap);
+               if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+                       ret = H_TOO_HARD;
+                       goto unlock_exit;
+               }
        }
 
        for (i = 0; i < npages; ++i) {
@@ -285,11 +469,33 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                if (ret != H_SUCCESS)
                        goto unlock_exit;
 
+               ua = 0;
+               if (kvmppc_gpa_to_ua(vcpu->kvm,
+                               tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
+                               &ua, NULL))
+                       return H_PARAMETER;
+
+               list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+                       ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+                                       stit->tbl, entry + i, ua,
+                                       iommu_tce_direction(tce));
+
+                       if (ret == H_SUCCESS)
+                               continue;
+
+                       if (ret == H_TOO_HARD)
+                               goto unlock_exit;
+
+                       WARN_ON_ONCE_RM(1);
+                       kvmppc_rm_clear_tce(stit->tbl, entry);
+               }
+
                kvmppc_tce_put(stt, entry + i, tce);
        }
 
 unlock_exit:
-       unlock_rmap(rmap);
+       if (rmap)
+               unlock_rmap(rmap);
 
        return ret;
 }
@@ -300,8 +506,9 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 {
        struct kvmppc_spapr_tce_table *stt;
        long i, ret;
+       struct kvmppc_spapr_tce_iommu_table *stit;
 
-       stt = kvmppc_find_table(vcpu, liobn);
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -313,6 +520,24 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
        if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
                return H_PARAMETER;
 
+       list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+               unsigned long entry = ioba >> stit->tbl->it_page_shift;
+
+               for (i = 0; i < npages; ++i) {
+                       ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+                                       stit->tbl, entry + i);
+
+                       if (ret == H_SUCCESS)
+                               continue;
+
+                       if (ret == H_TOO_HARD)
+                               return ret;
+
+                       WARN_ON_ONCE_RM(1);
+                       kvmppc_rm_clear_tce(stit->tbl, entry);
+               }
+       }
+
        for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
                kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
 
@@ -322,12 +547,13 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                      unsigned long ioba)
 {
-       struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+       struct kvmppc_spapr_tce_table *stt;
        long ret;
        unsigned long idx;
        struct page *page;
        u64 *tbl;
 
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;