2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
20 #include <linux/types.h>
21 #include <linux/string.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/highmem.h>
25 #include <linux/gfp.h>
26 #include <linux/slab.h>
27 #include <linux/hugetlb.h>
28 #include <linux/list.h>
29 #include <linux/anon_inodes.h>
30 #include <linux/iommu.h>
31 #include <linux/file.h>
33 #include <asm/tlbflush.h>
34 #include <asm/kvm_ppc.h>
35 #include <asm/kvm_book3s.h>
36 #include <asm/book3s/64/mmu-hash.h>
37 #include <asm/hvcall.h>
38 #include <asm/synch.h>
39 #include <asm/ppc-opcode.h>
40 #include <asm/kvm_host.h>
42 #include <asm/iommu.h>
44 #include <asm/mmu_context.h>
46 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages
)
48 return ALIGN(iommu_pages
* sizeof(u64
), PAGE_SIZE
) / PAGE_SIZE
;
51 static unsigned long kvmppc_stt_pages(unsigned long tce_pages
)
53 unsigned long stt_bytes
= sizeof(struct kvmppc_spapr_tce_table
) +
54 (tce_pages
* sizeof(struct page
*));
56 return tce_pages
+ ALIGN(stt_bytes
, PAGE_SIZE
) / PAGE_SIZE
;
59 static long kvmppc_account_memlimit(unsigned long stt_pages
, bool inc
)
63 if (!current
|| !current
->mm
)
64 return ret
; /* process exited */
66 down_write(¤t
->mm
->mmap_sem
);
69 unsigned long locked
, lock_limit
;
71 locked
= current
->mm
->locked_vm
+ stt_pages
;
72 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
73 if (locked
> lock_limit
&& !capable(CAP_IPC_LOCK
))
76 current
->mm
->locked_vm
+= stt_pages
;
78 if (WARN_ON_ONCE(stt_pages
> current
->mm
->locked_vm
))
79 stt_pages
= current
->mm
->locked_vm
;
81 current
->mm
->locked_vm
-= stt_pages
;
84 pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current
->pid
,
86 stt_pages
<< PAGE_SHIFT
,
87 current
->mm
->locked_vm
<< PAGE_SHIFT
,
88 rlimit(RLIMIT_MEMLOCK
),
89 ret
? " - exceeded" : "");
91 up_write(¤t
->mm
->mmap_sem
);
96 static void kvm_spapr_tce_iommu_table_free(struct rcu_head
*head
)
98 struct kvmppc_spapr_tce_iommu_table
*stit
= container_of(head
,
99 struct kvmppc_spapr_tce_iommu_table
, rcu
);
101 iommu_tce_table_put(stit
->tbl
);
106 static void kvm_spapr_tce_liobn_put(struct kref
*kref
)
108 struct kvmppc_spapr_tce_iommu_table
*stit
= container_of(kref
,
109 struct kvmppc_spapr_tce_iommu_table
, kref
);
111 list_del_rcu(&stit
->next
);
113 call_rcu(&stit
->rcu
, kvm_spapr_tce_iommu_table_free
);
116 extern void kvm_spapr_tce_release_iommu_group(struct kvm
*kvm
,
117 struct iommu_group
*grp
)
120 struct kvmppc_spapr_tce_table
*stt
;
121 struct kvmppc_spapr_tce_iommu_table
*stit
, *tmp
;
122 struct iommu_table_group
*table_group
= NULL
;
124 list_for_each_entry_rcu(stt
, &kvm
->arch
.spapr_tce_tables
, list
) {
126 table_group
= iommu_group_get_iommudata(grp
);
127 if (WARN_ON(!table_group
))
130 list_for_each_entry_safe(stit
, tmp
, &stt
->iommu_tables
, next
) {
131 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
132 if (table_group
->tables
[i
] != stit
->tbl
)
135 kref_put(&stit
->kref
, kvm_spapr_tce_liobn_put
);
142 extern long kvm_spapr_tce_attach_iommu_group(struct kvm
*kvm
, int tablefd
,
143 struct iommu_group
*grp
)
145 struct kvmppc_spapr_tce_table
*stt
= NULL
;
147 struct iommu_table
*tbl
= NULL
;
148 struct iommu_table_group
*table_group
;
150 struct kvmppc_spapr_tce_iommu_table
*stit
;
157 list_for_each_entry_rcu(stt
, &kvm
->arch
.spapr_tce_tables
, list
) {
158 if (stt
== f
.file
->private_data
) {
169 table_group
= iommu_group_get_iommudata(grp
);
170 if (WARN_ON(!table_group
))
173 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
174 struct iommu_table
*tbltmp
= table_group
->tables
[i
];
179 * Make sure hardware table parameters are exactly the same;
180 * this is used in the TCE handlers where boundary checks
181 * use only the first attached table.
183 if ((tbltmp
->it_page_shift
== stt
->page_shift
) &&
184 (tbltmp
->it_offset
== stt
->offset
) &&
185 (tbltmp
->it_size
== stt
->size
)) {
187 * Reference the table to avoid races with
188 * add/remove DMA windows.
190 tbl
= iommu_tce_table_get(tbltmp
);
197 list_for_each_entry_rcu(stit
, &stt
->iommu_tables
, next
) {
198 if (tbl
!= stit
->tbl
)
201 if (!kref_get_unless_zero(&stit
->kref
)) {
202 /* stit is being destroyed */
203 iommu_tce_table_put(tbl
);
207 * The table is already known to this KVM, we just increased
208 * its KVM reference counter and can return.
213 stit
= kzalloc(sizeof(*stit
), GFP_KERNEL
);
215 iommu_tce_table_put(tbl
);
220 kref_init(&stit
->kref
);
222 list_add_rcu(&stit
->next
, &stt
->iommu_tables
);
227 static void release_spapr_tce_table(struct rcu_head
*head
)
229 struct kvmppc_spapr_tce_table
*stt
= container_of(head
,
230 struct kvmppc_spapr_tce_table
, rcu
);
231 unsigned long i
, npages
= kvmppc_tce_pages(stt
->size
);
233 for (i
= 0; i
< npages
; i
++)
234 __free_page(stt
->pages
[i
]);
239 static int kvm_spapr_tce_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
241 struct kvmppc_spapr_tce_table
*stt
= vma
->vm_file
->private_data
;
244 if (vmf
->pgoff
>= kvmppc_tce_pages(stt
->size
))
245 return VM_FAULT_SIGBUS
;
247 page
= stt
->pages
[vmf
->pgoff
];
253 static const struct vm_operations_struct kvm_spapr_tce_vm_ops
= {
254 .fault
= kvm_spapr_tce_fault
,
257 static int kvm_spapr_tce_mmap(struct file
*file
, struct vm_area_struct
*vma
)
259 vma
->vm_ops
= &kvm_spapr_tce_vm_ops
;
263 static int kvm_spapr_tce_release(struct inode
*inode
, struct file
*filp
)
265 struct kvmppc_spapr_tce_table
*stt
= filp
->private_data
;
266 struct kvmppc_spapr_tce_iommu_table
*stit
, *tmp
;
268 list_del_rcu(&stt
->list
);
270 list_for_each_entry_safe(stit
, tmp
, &stt
->iommu_tables
, next
) {
272 if (kref_put(&stit
->kref
, kvm_spapr_tce_liobn_put
))
277 kvm_put_kvm(stt
->kvm
);
279 kvmppc_account_memlimit(
280 kvmppc_stt_pages(kvmppc_tce_pages(stt
->size
)), false);
281 call_rcu(&stt
->rcu
, release_spapr_tce_table
);
286 static const struct file_operations kvm_spapr_tce_fops
= {
287 .mmap
= kvm_spapr_tce_mmap
,
288 .release
= kvm_spapr_tce_release
,
291 long kvm_vm_ioctl_create_spapr_tce(struct kvm
*kvm
,
292 struct kvm_create_spapr_tce_64
*args
)
294 struct kvmppc_spapr_tce_table
*stt
= NULL
;
295 unsigned long npages
, size
;
302 /* Check this LIOBN hasn't been previously allocated */
303 list_for_each_entry(stt
, &kvm
->arch
.spapr_tce_tables
, list
) {
304 if (stt
->liobn
== args
->liobn
)
309 npages
= kvmppc_tce_pages(size
);
310 ret
= kvmppc_account_memlimit(kvmppc_stt_pages(npages
), true);
317 stt
= kzalloc(sizeof(*stt
) + npages
* sizeof(struct page
*),
322 stt
->liobn
= args
->liobn
;
323 stt
->page_shift
= args
->page_shift
;
324 stt
->offset
= args
->offset
;
327 INIT_LIST_HEAD_RCU(&stt
->iommu_tables
);
329 for (i
= 0; i
< npages
; i
++) {
330 stt
->pages
[i
] = alloc_page(GFP_KERNEL
| __GFP_ZERO
);
337 mutex_lock(&kvm
->lock
);
338 list_add_rcu(&stt
->list
, &kvm
->arch
.spapr_tce_tables
);
340 mutex_unlock(&kvm
->lock
);
342 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops
,
343 stt
, O_RDWR
| O_CLOEXEC
);
347 for (i
= 0; i
< npages
; i
++)
349 __free_page(stt
->pages
[i
]);
356 static void kvmppc_clear_tce(struct iommu_table
*tbl
, unsigned long entry
)
358 unsigned long hpa
= 0;
359 enum dma_data_direction dir
= DMA_NONE
;
361 iommu_tce_xchg(tbl
, entry
, &hpa
, &dir
);
364 static long kvmppc_tce_iommu_mapped_dec(struct kvm
*kvm
,
365 struct iommu_table
*tbl
, unsigned long entry
)
367 struct mm_iommu_table_group_mem_t
*mem
= NULL
;
368 const unsigned long pgsize
= 1ULL << tbl
->it_page_shift
;
369 unsigned long *pua
= IOMMU_TABLE_USERSPACE_ENTRY(tbl
, entry
);
372 /* it_userspace allocation might be delayed */
375 mem
= mm_iommu_lookup(kvm
->mm
, *pua
, pgsize
);
379 mm_iommu_mapped_dec(mem
);
386 static long kvmppc_tce_iommu_unmap(struct kvm
*kvm
,
387 struct iommu_table
*tbl
, unsigned long entry
)
389 enum dma_data_direction dir
= DMA_NONE
;
390 unsigned long hpa
= 0;
393 if (WARN_ON_ONCE(iommu_tce_xchg(tbl
, entry
, &hpa
, &dir
)))
399 ret
= kvmppc_tce_iommu_mapped_dec(kvm
, tbl
, entry
);
400 if (ret
!= H_SUCCESS
)
401 iommu_tce_xchg(tbl
, entry
, &hpa
, &dir
);
406 long kvmppc_tce_iommu_map(struct kvm
*kvm
, struct iommu_table
*tbl
,
407 unsigned long entry
, unsigned long ua
,
408 enum dma_data_direction dir
)
411 unsigned long hpa
, *pua
= IOMMU_TABLE_USERSPACE_ENTRY(tbl
, entry
);
412 struct mm_iommu_table_group_mem_t
*mem
;
415 /* it_userspace allocation might be delayed */
418 mem
= mm_iommu_lookup(kvm
->mm
, ua
, 1ULL << tbl
->it_page_shift
);
420 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */
423 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem
, ua
, &hpa
)))
426 if (mm_iommu_mapped_inc(mem
))
429 ret
= iommu_tce_xchg(tbl
, entry
, &hpa
, &dir
);
430 if (WARN_ON_ONCE(ret
)) {
431 mm_iommu_mapped_dec(mem
);
436 kvmppc_tce_iommu_mapped_dec(kvm
, tbl
, entry
);
443 long kvmppc_h_put_tce(struct kvm_vcpu
*vcpu
, unsigned long liobn
,
444 unsigned long ioba
, unsigned long tce
)
446 struct kvmppc_spapr_tce_table
*stt
;
448 struct kvmppc_spapr_tce_iommu_table
*stit
;
449 unsigned long entry
, ua
= 0;
450 enum dma_data_direction dir
;
452 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
453 /* liobn, ioba, tce); */
455 stt
= kvmppc_find_table(vcpu
->kvm
, liobn
);
459 ret
= kvmppc_ioba_validate(stt
, ioba
, 1);
460 if (ret
!= H_SUCCESS
)
463 ret
= kvmppc_tce_validate(stt
, tce
);
464 if (ret
!= H_SUCCESS
)
467 dir
= iommu_tce_direction(tce
);
468 if ((dir
!= DMA_NONE
) && kvmppc_gpa_to_ua(vcpu
->kvm
,
469 tce
& ~(TCE_PCI_READ
| TCE_PCI_WRITE
), &ua
, NULL
))
472 entry
= ioba
>> stt
->page_shift
;
474 list_for_each_entry_lockless(stit
, &stt
->iommu_tables
, next
) {
475 if (dir
== DMA_NONE
) {
476 ret
= kvmppc_tce_iommu_unmap(vcpu
->kvm
,
479 idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
480 ret
= kvmppc_tce_iommu_map(vcpu
->kvm
, stit
->tbl
,
482 srcu_read_unlock(&vcpu
->kvm
->srcu
, idx
);
485 if (ret
== H_SUCCESS
)
488 if (ret
== H_TOO_HARD
)
492 kvmppc_clear_tce(stit
->tbl
, entry
);
495 kvmppc_tce_put(stt
, entry
, tce
);
499 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce
);
501 long kvmppc_h_put_tce_indirect(struct kvm_vcpu
*vcpu
,
502 unsigned long liobn
, unsigned long ioba
,
503 unsigned long tce_list
, unsigned long npages
)
505 struct kvmppc_spapr_tce_table
*stt
;
506 long i
, ret
= H_SUCCESS
, idx
;
507 unsigned long entry
, ua
= 0;
510 struct kvmppc_spapr_tce_iommu_table
*stit
;
512 stt
= kvmppc_find_table(vcpu
->kvm
, liobn
);
516 entry
= ioba
>> stt
->page_shift
;
518 * SPAPR spec says that the maximum size of the list is 512 TCEs
519 * so the whole table fits in 4K page
524 if (tce_list
& (SZ_4K
- 1))
527 ret
= kvmppc_ioba_validate(stt
, ioba
, npages
);
528 if (ret
!= H_SUCCESS
)
531 idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
532 if (kvmppc_gpa_to_ua(vcpu
->kvm
, tce_list
, &ua
, NULL
)) {
536 tces
= (u64 __user
*) ua
;
538 for (i
= 0; i
< npages
; ++i
) {
539 if (get_user(tce
, tces
+ i
)) {
543 tce
= be64_to_cpu(tce
);
545 ret
= kvmppc_tce_validate(stt
, tce
);
546 if (ret
!= H_SUCCESS
)
549 if (kvmppc_gpa_to_ua(vcpu
->kvm
,
550 tce
& ~(TCE_PCI_READ
| TCE_PCI_WRITE
),
554 list_for_each_entry_lockless(stit
, &stt
->iommu_tables
, next
) {
555 ret
= kvmppc_tce_iommu_map(vcpu
->kvm
,
556 stit
->tbl
, entry
+ i
, ua
,
557 iommu_tce_direction(tce
));
559 if (ret
== H_SUCCESS
)
562 if (ret
== H_TOO_HARD
)
566 kvmppc_clear_tce(stit
->tbl
, entry
);
569 kvmppc_tce_put(stt
, entry
+ i
, tce
);
573 srcu_read_unlock(&vcpu
->kvm
->srcu
, idx
);
577 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect
);
579 long kvmppc_h_stuff_tce(struct kvm_vcpu
*vcpu
,
580 unsigned long liobn
, unsigned long ioba
,
581 unsigned long tce_value
, unsigned long npages
)
583 struct kvmppc_spapr_tce_table
*stt
;
585 struct kvmppc_spapr_tce_iommu_table
*stit
;
587 stt
= kvmppc_find_table(vcpu
->kvm
, liobn
);
591 ret
= kvmppc_ioba_validate(stt
, ioba
, npages
);
592 if (ret
!= H_SUCCESS
)
595 /* Check permission bits only to allow userspace poison TCE for debug */
596 if (tce_value
& (TCE_PCI_WRITE
| TCE_PCI_READ
))
599 list_for_each_entry_lockless(stit
, &stt
->iommu_tables
, next
) {
600 unsigned long entry
= ioba
>> stit
->tbl
->it_page_shift
;
602 for (i
= 0; i
< npages
; ++i
) {
603 ret
= kvmppc_tce_iommu_unmap(vcpu
->kvm
,
604 stit
->tbl
, entry
+ i
);
606 if (ret
== H_SUCCESS
)
609 if (ret
== H_TOO_HARD
)
613 kvmppc_clear_tce(stit
->tbl
, entry
);
617 for (i
= 0; i
< npages
; ++i
, ioba
+= (1ULL << stt
->page_shift
))
618 kvmppc_tce_put(stt
, ioba
>> stt
->page_shift
, tce_value
);
622 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce
);