]>
Commit | Line | Data |
---|---|---|
f31e65e1 BH |
1 | /* |
2 | * This program is free software; you can redistribute it and/or modify | |
3 | * it under the terms of the GNU General Public License, version 2, as | |
4 | * published by the Free Software Foundation. | |
5 | * | |
6 | * This program is distributed in the hope that it will be useful, | |
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
9 | * GNU General Public License for more details. | |
10 | * | |
11 | * You should have received a copy of the GNU General Public License | |
12 | * along with this program; if not, write to the Free Software | |
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
14 | * | |
15 | * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | |
16 | * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> | |
d3695aa4 | 17 | * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> |
f31e65e1 BH |
18 | */ |
19 | ||
20 | #include <linux/types.h> | |
21 | #include <linux/string.h> | |
22 | #include <linux/kvm.h> | |
23 | #include <linux/kvm_host.h> | |
24 | #include <linux/highmem.h> | |
25 | #include <linux/gfp.h> | |
26 | #include <linux/slab.h> | |
3f07c014 | 27 | #include <linux/sched/signal.h> |
f31e65e1 BH |
28 | #include <linux/hugetlb.h> |
29 | #include <linux/list.h> | |
30 | #include <linux/anon_inodes.h> | |
31 | ||
32 | #include <asm/tlbflush.h> | |
33 | #include <asm/kvm_ppc.h> | |
34 | #include <asm/kvm_book3s.h> | |
f64e8084 | 35 | #include <asm/book3s/64/mmu-hash.h> |
f31e65e1 BH |
36 | #include <asm/hvcall.h> |
37 | #include <asm/synch.h> | |
38 | #include <asm/ppc-opcode.h> | |
39 | #include <asm/kvm_host.h> | |
40 | #include <asm/udbg.h> | |
462ee11e | 41 | #include <asm/iommu.h> |
d3695aa4 | 42 | #include <asm/tce.h> |
f31e65e1 | 43 | |
fe26e527 | 44 | static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) |
f31e65e1 | 45 | { |
fe26e527 | 46 | return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; |
f31e65e1 BH |
47 | } |
48 | ||
f8626985 AK |
49 | static unsigned long kvmppc_stt_pages(unsigned long tce_pages) |
50 | { | |
51 | unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + | |
52 | (tce_pages * sizeof(struct page *)); | |
53 | ||
54 | return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; | |
55 | } | |
56 | ||
57 | static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) | |
58 | { | |
59 | long ret = 0; | |
60 | ||
61 | if (!current || !current->mm) | |
62 | return ret; /* process exited */ | |
63 | ||
64 | down_write(¤t->mm->mmap_sem); | |
65 | ||
66 | if (inc) { | |
67 | unsigned long locked, lock_limit; | |
68 | ||
69 | locked = current->mm->locked_vm + stt_pages; | |
70 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
71 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | |
72 | ret = -ENOMEM; | |
73 | else | |
74 | current->mm->locked_vm += stt_pages; | |
75 | } else { | |
76 | if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) | |
77 | stt_pages = current->mm->locked_vm; | |
78 | ||
79 | current->mm->locked_vm -= stt_pages; | |
80 | } | |
81 | ||
82 | pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, | |
83 | inc ? '+' : '-', | |
84 | stt_pages << PAGE_SHIFT, | |
85 | current->mm->locked_vm << PAGE_SHIFT, | |
86 | rlimit(RLIMIT_MEMLOCK), | |
87 | ret ? " - exceeded" : ""); | |
88 | ||
89 | up_write(¤t->mm->mmap_sem); | |
90 | ||
91 | return ret; | |
92 | } | |
93 | ||
366baf28 | 94 | static void release_spapr_tce_table(struct rcu_head *head) |
f31e65e1 | 95 | { |
366baf28 AK |
96 | struct kvmppc_spapr_tce_table *stt = container_of(head, |
97 | struct kvmppc_spapr_tce_table, rcu); | |
fe26e527 | 98 | unsigned long i, npages = kvmppc_tce_pages(stt->size); |
f31e65e1 | 99 | |
f8626985 | 100 | for (i = 0; i < npages; i++) |
f31e65e1 | 101 | __free_page(stt->pages[i]); |
f31e65e1 | 102 | |
366baf28 | 103 | kfree(stt); |
f31e65e1 BH |
104 | } |
105 | ||
11bac800 | 106 | static int kvm_spapr_tce_fault(struct vm_fault *vmf) |
f31e65e1 | 107 | { |
11bac800 | 108 | struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; |
f31e65e1 BH |
109 | struct page *page; |
110 | ||
fe26e527 | 111 | if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) |
f31e65e1 BH |
112 | return VM_FAULT_SIGBUS; |
113 | ||
114 | page = stt->pages[vmf->pgoff]; | |
115 | get_page(page); | |
116 | vmf->page = page; | |
117 | return 0; | |
118 | } | |
119 | ||
120 | static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { | |
121 | .fault = kvm_spapr_tce_fault, | |
122 | }; | |
123 | ||
124 | static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) | |
125 | { | |
126 | vma->vm_ops = &kvm_spapr_tce_vm_ops; | |
127 | return 0; | |
128 | } | |
129 | ||
130 | static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) | |
131 | { | |
132 | struct kvmppc_spapr_tce_table *stt = filp->private_data; | |
133 | ||
366baf28 AK |
134 | list_del_rcu(&stt->list); |
135 | ||
136 | kvm_put_kvm(stt->kvm); | |
137 | ||
f8626985 | 138 | kvmppc_account_memlimit( |
fe26e527 | 139 | kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); |
366baf28 AK |
140 | call_rcu(&stt->rcu, release_spapr_tce_table); |
141 | ||
f31e65e1 BH |
142 | return 0; |
143 | } | |
144 | ||
75ef9de1 | 145 | static const struct file_operations kvm_spapr_tce_fops = { |
f31e65e1 BH |
146 | .mmap = kvm_spapr_tce_mmap, |
147 | .release = kvm_spapr_tce_release, | |
148 | }; | |
149 | ||
150 | long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |
58ded420 | 151 | struct kvm_create_spapr_tce_64 *args) |
f31e65e1 BH |
152 | { |
153 | struct kvmppc_spapr_tce_table *stt = NULL; | |
fe26e527 | 154 | unsigned long npages, size; |
f31e65e1 BH |
155 | int ret = -ENOMEM; |
156 | int i; | |
157 | ||
58ded420 AK |
158 | if (!args->size) |
159 | return -EINVAL; | |
160 | ||
f31e65e1 BH |
161 | /* Check this LIOBN hasn't been previously allocated */ |
162 | list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { | |
163 | if (stt->liobn == args->liobn) | |
164 | return -EBUSY; | |
165 | } | |
166 | ||
58ded420 | 167 | size = args->size; |
fe26e527 | 168 | npages = kvmppc_tce_pages(size); |
f8626985 AK |
169 | ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); |
170 | if (ret) { | |
171 | stt = NULL; | |
172 | goto fail; | |
173 | } | |
f31e65e1 | 174 | |
5982f084 | 175 | ret = -ENOMEM; |
f31e65e1 BH |
176 | stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), |
177 | GFP_KERNEL); | |
178 | if (!stt) | |
179 | goto fail; | |
180 | ||
181 | stt->liobn = args->liobn; | |
58ded420 AK |
182 | stt->page_shift = args->page_shift; |
183 | stt->offset = args->offset; | |
fe26e527 | 184 | stt->size = size; |
f31e65e1 BH |
185 | stt->kvm = kvm; |
186 | ||
187 | for (i = 0; i < npages; i++) { | |
188 | stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); | |
189 | if (!stt->pages[i]) | |
190 | goto fail; | |
191 | } | |
192 | ||
193 | kvm_get_kvm(kvm); | |
194 | ||
195 | mutex_lock(&kvm->lock); | |
366baf28 | 196 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); |
f31e65e1 BH |
197 | |
198 | mutex_unlock(&kvm->lock); | |
199 | ||
200 | return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | |
2f84d5ea | 201 | stt, O_RDWR | O_CLOEXEC); |
f31e65e1 BH |
202 | |
203 | fail: | |
204 | if (stt) { | |
205 | for (i = 0; i < npages; i++) | |
206 | if (stt->pages[i]) | |
207 | __free_page(stt->pages[i]); | |
208 | ||
209 | kfree(stt); | |
210 | } | |
211 | return ret; | |
212 | } | |
d3695aa4 | 213 | |
31217db7 AK |
214 | long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
215 | unsigned long ioba, unsigned long tce) | |
216 | { | |
217 | struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); | |
218 | long ret; | |
219 | ||
220 | /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ | |
221 | /* liobn, ioba, tce); */ | |
222 | ||
223 | if (!stt) | |
224 | return H_TOO_HARD; | |
225 | ||
226 | ret = kvmppc_ioba_validate(stt, ioba, 1); | |
227 | if (ret != H_SUCCESS) | |
228 | return ret; | |
229 | ||
230 | ret = kvmppc_tce_validate(stt, tce); | |
231 | if (ret != H_SUCCESS) | |
232 | return ret; | |
233 | ||
234 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); | |
235 | ||
236 | return H_SUCCESS; | |
237 | } | |
238 | EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); | |
239 | ||
d3695aa4 AK |
240 | long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, |
241 | unsigned long liobn, unsigned long ioba, | |
242 | unsigned long tce_list, unsigned long npages) | |
243 | { | |
244 | struct kvmppc_spapr_tce_table *stt; | |
245 | long i, ret = H_SUCCESS, idx; | |
246 | unsigned long entry, ua = 0; | |
f8750513 DA |
247 | u64 __user *tces; |
248 | u64 tce; | |
d3695aa4 AK |
249 | |
250 | stt = kvmppc_find_table(vcpu, liobn); | |
251 | if (!stt) | |
252 | return H_TOO_HARD; | |
253 | ||
fe26e527 | 254 | entry = ioba >> stt->page_shift; |
d3695aa4 AK |
255 | /* |
256 | * SPAPR spec says that the maximum size of the list is 512 TCEs | |
257 | * so the whole table fits in 4K page | |
258 | */ | |
259 | if (npages > 512) | |
260 | return H_PARAMETER; | |
261 | ||
262 | if (tce_list & (SZ_4K - 1)) | |
263 | return H_PARAMETER; | |
264 | ||
265 | ret = kvmppc_ioba_validate(stt, ioba, npages); | |
266 | if (ret != H_SUCCESS) | |
267 | return ret; | |
268 | ||
269 | idx = srcu_read_lock(&vcpu->kvm->srcu); | |
270 | if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { | |
271 | ret = H_TOO_HARD; | |
272 | goto unlock_exit; | |
273 | } | |
274 | tces = (u64 __user *) ua; | |
275 | ||
276 | for (i = 0; i < npages; ++i) { | |
277 | if (get_user(tce, tces + i)) { | |
278 | ret = H_TOO_HARD; | |
279 | goto unlock_exit; | |
280 | } | |
281 | tce = be64_to_cpu(tce); | |
282 | ||
283 | ret = kvmppc_tce_validate(stt, tce); | |
284 | if (ret != H_SUCCESS) | |
285 | goto unlock_exit; | |
286 | ||
287 | kvmppc_tce_put(stt, entry + i, tce); | |
288 | } | |
289 | ||
290 | unlock_exit: | |
291 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | |
292 | ||
293 | return ret; | |
294 | } | |
295 | EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); | |
31217db7 AK |
296 | |
297 | long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, | |
298 | unsigned long liobn, unsigned long ioba, | |
299 | unsigned long tce_value, unsigned long npages) | |
300 | { | |
301 | struct kvmppc_spapr_tce_table *stt; | |
302 | long i, ret; | |
303 | ||
304 | stt = kvmppc_find_table(vcpu, liobn); | |
305 | if (!stt) | |
306 | return H_TOO_HARD; | |
307 | ||
308 | ret = kvmppc_ioba_validate(stt, ioba, npages); | |
309 | if (ret != H_SUCCESS) | |
310 | return ret; | |
311 | ||
312 | /* Check permission bits only to allow userspace poison TCE for debug */ | |
313 | if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) | |
314 | return H_PARAMETER; | |
315 | ||
316 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) | |
317 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); | |
318 | ||
319 | return H_SUCCESS; | |
320 | } | |
321 | EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); |