]>
Commit | Line | Data |
---|---|---|
1c5de193 JF |
1 | /****************************************************************************** |
2 | * privcmd.c | |
3 | * | |
4 | * Interface to privileged domain-0 commands. | |
5 | * | |
6 | * Copyright (c) 2002-2004, K A Fraser, B Dragovic | |
7 | */ | |
8 | ||
283c0972 JP |
9 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt |
10 | ||
1c5de193 | 11 | #include <linux/kernel.h> |
d8414d3c | 12 | #include <linux/module.h> |
1c5de193 JF |
13 | #include <linux/sched.h> |
14 | #include <linux/slab.h> | |
15 | #include <linux/string.h> | |
16 | #include <linux/errno.h> | |
17 | #include <linux/mm.h> | |
18 | #include <linux/mman.h> | |
19 | #include <linux/uaccess.h> | |
20 | #include <linux/swap.h> | |
1c5de193 JF |
21 | #include <linux/highmem.h> |
22 | #include <linux/pagemap.h> | |
23 | #include <linux/seq_file.h> | |
d8414d3c | 24 | #include <linux/miscdevice.h> |
ab520be8 | 25 | #include <linux/moduleparam.h> |
1c5de193 JF |
26 | |
27 | #include <asm/pgalloc.h> | |
28 | #include <asm/pgtable.h> | |
29 | #include <asm/tlb.h> | |
30 | #include <asm/xen/hypervisor.h> | |
31 | #include <asm/xen/hypercall.h> | |
32 | ||
33 | #include <xen/xen.h> | |
34 | #include <xen/privcmd.h> | |
35 | #include <xen/interface/xen.h> | |
3ad08765 | 36 | #include <xen/interface/memory.h> |
ab520be8 | 37 | #include <xen/interface/hvm/dm_op.h> |
1c5de193 JF |
38 | #include <xen/features.h> |
39 | #include <xen/page.h> | |
de1ef206 | 40 | #include <xen/xen-ops.h> |
d71f5139 | 41 | #include <xen/balloon.h> |
f020e290 | 42 | |
d8414d3c BB |
43 | #include "privcmd.h" |
44 | ||
45 | MODULE_LICENSE("GPL"); | |
46 | ||
d71f5139 MR |
47 | #define PRIV_VMA_LOCKED ((void *)1) |
48 | ||
ab520be8 PD |
49 | static unsigned int privcmd_dm_op_max_num = 16; |
50 | module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644); | |
51 | MODULE_PARM_DESC(dm_op_max_nr_bufs, | |
52 | "Maximum number of buffers per dm_op hypercall"); | |
53 | ||
54 | static unsigned int privcmd_dm_op_buf_max_size = 4096; | |
55 | module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, | |
56 | 0644); | |
57 | MODULE_PARM_DESC(dm_op_buf_max_size, | |
58 | "Maximum size of a dm_op hypercall buffer"); | |
59 | ||
4610d240 PD |
60 | struct privcmd_data { |
61 | domid_t domid; | |
62 | }; | |
63 | ||
a5deabe0 ALC |
64 | static int privcmd_vma_range_is_mapped( |
65 | struct vm_area_struct *vma, | |
66 | unsigned long addr, | |
67 | unsigned long nr_pages); | |
1c5de193 | 68 | |
4610d240 | 69 | static long privcmd_ioctl_hypercall(struct file *file, void __user *udata) |
1c5de193 | 70 | { |
4610d240 | 71 | struct privcmd_data *data = file->private_data; |
1c5de193 JF |
72 | struct privcmd_hypercall hypercall; |
73 | long ret; | |
74 | ||
4610d240 PD |
75 | /* Disallow arbitrary hypercalls if restricted */ |
76 | if (data->domid != DOMID_INVALID) | |
77 | return -EPERM; | |
78 | ||
1c5de193 JF |
79 | if (copy_from_user(&hypercall, udata, sizeof(hypercall))) |
80 | return -EFAULT; | |
81 | ||
fdfd811d | 82 | xen_preemptible_hcall_begin(); |
1c5de193 JF |
83 | ret = privcmd_call(hypercall.op, |
84 | hypercall.arg[0], hypercall.arg[1], | |
85 | hypercall.arg[2], hypercall.arg[3], | |
86 | hypercall.arg[4]); | |
fdfd811d | 87 | xen_preemptible_hcall_end(); |
1c5de193 JF |
88 | |
89 | return ret; | |
90 | } | |
91 | ||
92 | static void free_page_list(struct list_head *pages) | |
93 | { | |
94 | struct page *p, *n; | |
95 | ||
96 | list_for_each_entry_safe(p, n, pages, lru) | |
97 | __free_page(p); | |
98 | ||
99 | INIT_LIST_HEAD(pages); | |
100 | } | |
101 | ||
102 | /* | |
103 | * Given an array of items in userspace, return a list of pages | |
104 | * containing the data. If copying fails, either because of memory | |
105 | * allocation failure or a problem reading user memory, return an | |
106 | * error code; its up to the caller to dispose of any partial list. | |
107 | */ | |
108 | static int gather_array(struct list_head *pagelist, | |
109 | unsigned nelem, size_t size, | |
ceb90fa0 | 110 | const void __user *data) |
1c5de193 JF |
111 | { |
112 | unsigned pageidx; | |
113 | void *pagedata; | |
114 | int ret; | |
115 | ||
116 | if (size > PAGE_SIZE) | |
117 | return 0; | |
118 | ||
119 | pageidx = PAGE_SIZE; | |
120 | pagedata = NULL; /* quiet, gcc */ | |
121 | while (nelem--) { | |
122 | if (pageidx > PAGE_SIZE-size) { | |
123 | struct page *page = alloc_page(GFP_KERNEL); | |
124 | ||
125 | ret = -ENOMEM; | |
126 | if (page == NULL) | |
127 | goto fail; | |
128 | ||
129 | pagedata = page_address(page); | |
130 | ||
131 | list_add_tail(&page->lru, pagelist); | |
132 | pageidx = 0; | |
133 | } | |
134 | ||
135 | ret = -EFAULT; | |
136 | if (copy_from_user(pagedata + pageidx, data, size)) | |
137 | goto fail; | |
138 | ||
139 | data += size; | |
140 | pageidx += size; | |
141 | } | |
142 | ||
143 | ret = 0; | |
144 | ||
145 | fail: | |
146 | return ret; | |
147 | } | |
148 | ||
149 | /* | |
150 | * Call function "fn" on each element of the array fragmented | |
151 | * over a list of pages. | |
152 | */ | |
153 | static int traverse_pages(unsigned nelem, size_t size, | |
154 | struct list_head *pos, | |
155 | int (*fn)(void *data, void *state), | |
156 | void *state) | |
157 | { | |
158 | void *pagedata; | |
159 | unsigned pageidx; | |
f020e290 | 160 | int ret = 0; |
1c5de193 JF |
161 | |
162 | BUG_ON(size > PAGE_SIZE); | |
163 | ||
164 | pageidx = PAGE_SIZE; | |
165 | pagedata = NULL; /* hush, gcc */ | |
166 | ||
167 | while (nelem--) { | |
168 | if (pageidx > PAGE_SIZE-size) { | |
169 | struct page *page; | |
170 | pos = pos->next; | |
171 | page = list_entry(pos, struct page, lru); | |
172 | pagedata = page_address(page); | |
173 | pageidx = 0; | |
174 | } | |
175 | ||
176 | ret = (*fn)(pagedata + pageidx, state); | |
177 | if (ret) | |
178 | break; | |
179 | pageidx += size; | |
180 | } | |
181 | ||
182 | return ret; | |
183 | } | |
184 | ||
4e8c0c8c DV |
185 | /* |
186 | * Similar to traverse_pages, but use each page as a "block" of | |
187 | * data to be processed as one unit. | |
188 | */ | |
189 | static int traverse_pages_block(unsigned nelem, size_t size, | |
190 | struct list_head *pos, | |
191 | int (*fn)(void *data, int nr, void *state), | |
192 | void *state) | |
193 | { | |
194 | void *pagedata; | |
4e8c0c8c DV |
195 | int ret = 0; |
196 | ||
197 | BUG_ON(size > PAGE_SIZE); | |
198 | ||
4e8c0c8c DV |
199 | while (nelem) { |
200 | int nr = (PAGE_SIZE/size); | |
201 | struct page *page; | |
202 | if (nr > nelem) | |
203 | nr = nelem; | |
204 | pos = pos->next; | |
205 | page = list_entry(pos, struct page, lru); | |
206 | pagedata = page_address(page); | |
207 | ret = (*fn)(pagedata, nr, state); | |
208 | if (ret) | |
209 | break; | |
210 | nelem -= nr; | |
211 | } | |
212 | ||
213 | return ret; | |
214 | } | |
215 | ||
a13d7201 | 216 | struct mmap_gfn_state { |
1c5de193 JF |
217 | unsigned long va; |
218 | struct vm_area_struct *vma; | |
219 | domid_t domain; | |
220 | }; | |
221 | ||
a13d7201 | 222 | static int mmap_gfn_range(void *data, void *state) |
1c5de193 JF |
223 | { |
224 | struct privcmd_mmap_entry *msg = data; | |
a13d7201 | 225 | struct mmap_gfn_state *st = state; |
1c5de193 JF |
226 | struct vm_area_struct *vma = st->vma; |
227 | int rc; | |
228 | ||
229 | /* Do not allow range to wrap the address space. */ | |
230 | if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || | |
231 | ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) | |
232 | return -EINVAL; | |
233 | ||
234 | /* Range chunks must be contiguous in va space. */ | |
235 | if ((msg->va != st->va) || | |
236 | ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) | |
237 | return -EINVAL; | |
238 | ||
a13d7201 | 239 | rc = xen_remap_domain_gfn_range(vma, |
de1ef206 IC |
240 | msg->va & PAGE_MASK, |
241 | msg->mfn, msg->npages, | |
242 | vma->vm_page_prot, | |
9a032e39 | 243 | st->domain, NULL); |
1c5de193 JF |
244 | if (rc < 0) |
245 | return rc; | |
246 | ||
247 | st->va += msg->npages << PAGE_SHIFT; | |
248 | ||
249 | return 0; | |
250 | } | |
251 | ||
4610d240 | 252 | static long privcmd_ioctl_mmap(struct file *file, void __user *udata) |
1c5de193 | 253 | { |
4610d240 | 254 | struct privcmd_data *data = file->private_data; |
1c5de193 JF |
255 | struct privcmd_mmap mmapcmd; |
256 | struct mm_struct *mm = current->mm; | |
257 | struct vm_area_struct *vma; | |
258 | int rc; | |
259 | LIST_HEAD(pagelist); | |
a13d7201 | 260 | struct mmap_gfn_state state; |
1c5de193 | 261 | |
d71f5139 MR |
262 | /* We only support privcmd_ioctl_mmap_batch for auto translated. */ |
263 | if (xen_feature(XENFEAT_auto_translated_physmap)) | |
264 | return -ENOSYS; | |
265 | ||
1c5de193 JF |
266 | if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) |
267 | return -EFAULT; | |
268 | ||
4610d240 PD |
269 | /* If restriction is in place, check the domid matches */ |
270 | if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom) | |
271 | return -EPERM; | |
272 | ||
1c5de193 JF |
273 | rc = gather_array(&pagelist, |
274 | mmapcmd.num, sizeof(struct privcmd_mmap_entry), | |
275 | mmapcmd.entry); | |
276 | ||
277 | if (rc || list_empty(&pagelist)) | |
278 | goto out; | |
279 | ||
280 | down_write(&mm->mmap_sem); | |
281 | ||
282 | { | |
283 | struct page *page = list_first_entry(&pagelist, | |
284 | struct page, lru); | |
285 | struct privcmd_mmap_entry *msg = page_address(page); | |
286 | ||
287 | vma = find_vma(mm, msg->va); | |
288 | rc = -EINVAL; | |
289 | ||
a5deabe0 | 290 | if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) |
1c5de193 | 291 | goto out_up; |
a5deabe0 | 292 | vma->vm_private_data = PRIV_VMA_LOCKED; |
1c5de193 JF |
293 | } |
294 | ||
295 | state.va = vma->vm_start; | |
296 | state.vma = vma; | |
297 | state.domain = mmapcmd.dom; | |
298 | ||
299 | rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), | |
300 | &pagelist, | |
a13d7201 | 301 | mmap_gfn_range, &state); |
1c5de193 JF |
302 | |
303 | ||
304 | out_up: | |
305 | up_write(&mm->mmap_sem); | |
306 | ||
307 | out: | |
308 | free_page_list(&pagelist); | |
309 | ||
310 | return rc; | |
311 | } | |
312 | ||
313 | struct mmap_batch_state { | |
314 | domid_t domain; | |
315 | unsigned long va; | |
316 | struct vm_area_struct *vma; | |
d71f5139 | 317 | int index; |
ceb90fa0 ALC |
318 | /* A tristate: |
319 | * 0 for no errors | |
320 | * 1 if at least one error has happened (and no | |
321 | * -ENOENT errors have happened) | |
322 | * -ENOENT if at least 1 -ENOENT has happened. | |
323 | */ | |
324 | int global_error; | |
99beae6c | 325 | int version; |
ceb90fa0 | 326 | |
a13d7201 JG |
327 | /* User-space gfn array to store errors in the second pass for V1. */ |
328 | xen_pfn_t __user *user_gfn; | |
99beae6c ALC |
329 | /* User-space int array to store errors in the second pass for V2. */ |
330 | int __user *user_err; | |
1c5de193 JF |
331 | }; |
332 | ||
a13d7201 JG |
333 | /* auto translated dom0 note: if domU being created is PV, then gfn is |
334 | * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP). | |
d71f5139 | 335 | */ |
4e8c0c8c | 336 | static int mmap_batch_fn(void *data, int nr, void *state) |
1c5de193 | 337 | { |
a13d7201 | 338 | xen_pfn_t *gfnp = data; |
1c5de193 | 339 | struct mmap_batch_state *st = state; |
d71f5139 MR |
340 | struct vm_area_struct *vma = st->vma; |
341 | struct page **pages = vma->vm_private_data; | |
4e8c0c8c | 342 | struct page **cur_pages = NULL; |
ceb90fa0 ALC |
343 | int ret; |
344 | ||
d71f5139 | 345 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
4e8c0c8c | 346 | cur_pages = &pages[st->index]; |
d71f5139 | 347 | |
4e8c0c8c | 348 | BUG_ON(nr < 0); |
a13d7201 JG |
349 | ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr, |
350 | (int *)gfnp, st->vma->vm_page_prot, | |
4e8c0c8c | 351 | st->domain, cur_pages); |
1c5de193 | 352 | |
4e8c0c8c DV |
353 | /* Adjust the global_error? */ |
354 | if (ret != nr) { | |
ceb90fa0 ALC |
355 | if (ret == -ENOENT) |
356 | st->global_error = -ENOENT; | |
357 | else { | |
358 | /* Record that at least one error has happened. */ | |
359 | if (st->global_error == 0) | |
360 | st->global_error = 1; | |
361 | } | |
1c5de193 | 362 | } |
753c09b5 JG |
363 | st->va += XEN_PAGE_SIZE * nr; |
364 | st->index += nr / XEN_PFN_PER_PAGE; | |
1c5de193 JF |
365 | |
366 | return 0; | |
367 | } | |
368 | ||
4e8c0c8c | 369 | static int mmap_return_error(int err, struct mmap_batch_state *st) |
1c5de193 | 370 | { |
4e8c0c8c | 371 | int ret; |
ceb90fa0 | 372 | |
99beae6c | 373 | if (st->version == 1) { |
4e8c0c8c | 374 | if (err) { |
a13d7201 | 375 | xen_pfn_t gfn; |
4e8c0c8c | 376 | |
a13d7201 | 377 | ret = get_user(gfn, st->user_gfn); |
4e8c0c8c DV |
378 | if (ret < 0) |
379 | return ret; | |
380 | /* | |
381 | * V1 encodes the error codes in the 32bit top | |
a13d7201 | 382 | * nibble of the gfn (with its known |
4e8c0c8c DV |
383 | * limitations vis-a-vis 64 bit callers). |
384 | */ | |
a13d7201 | 385 | gfn |= (err == -ENOENT) ? |
4e8c0c8c DV |
386 | PRIVCMD_MMAPBATCH_PAGED_ERROR : |
387 | PRIVCMD_MMAPBATCH_MFN_ERROR; | |
a13d7201 | 388 | return __put_user(gfn, st->user_gfn++); |
4e8c0c8c | 389 | } else |
a13d7201 | 390 | st->user_gfn++; |
99beae6c | 391 | } else { /* st->version == 2 */ |
99beae6c ALC |
392 | if (err) |
393 | return __put_user(err, st->user_err++); | |
394 | else | |
395 | st->user_err++; | |
396 | } | |
397 | ||
398 | return 0; | |
1c5de193 JF |
399 | } |
400 | ||
4e8c0c8c DV |
401 | static int mmap_return_errors(void *data, int nr, void *state) |
402 | { | |
403 | struct mmap_batch_state *st = state; | |
404 | int *errs = data; | |
405 | int i; | |
406 | int ret; | |
407 | ||
408 | for (i = 0; i < nr; i++) { | |
409 | ret = mmap_return_error(errs[i], st); | |
410 | if (ret < 0) | |
411 | return ret; | |
412 | } | |
413 | return 0; | |
414 | } | |
415 | ||
a13d7201 | 416 | /* Allocate pfns that are then mapped with gfns from foreign domid. Update |
d71f5139 MR |
417 | * the vma with the page info to use later. |
418 | * Returns: 0 if success, otherwise -errno | |
419 | */ | |
420 | static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) | |
421 | { | |
422 | int rc; | |
423 | struct page **pages; | |
424 | ||
425 | pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); | |
426 | if (pages == NULL) | |
427 | return -ENOMEM; | |
428 | ||
81b286e0 | 429 | rc = alloc_xenballooned_pages(numpgs, pages); |
d71f5139 MR |
430 | if (rc != 0) { |
431 | pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, | |
432 | numpgs, rc); | |
433 | kfree(pages); | |
434 | return -ENOMEM; | |
435 | } | |
a5deabe0 | 436 | BUG_ON(vma->vm_private_data != NULL); |
d71f5139 MR |
437 | vma->vm_private_data = pages; |
438 | ||
439 | return 0; | |
440 | } | |
441 | ||
7cbea8dc | 442 | static const struct vm_operations_struct privcmd_vm_ops; |
f31fdf51 | 443 | |
4610d240 PD |
444 | static long privcmd_ioctl_mmap_batch( |
445 | struct file *file, void __user *udata, int version) | |
1c5de193 | 446 | { |
4610d240 | 447 | struct privcmd_data *data = file->private_data; |
1c5de193 | 448 | int ret; |
ceb90fa0 | 449 | struct privcmd_mmapbatch_v2 m; |
1c5de193 JF |
450 | struct mm_struct *mm = current->mm; |
451 | struct vm_area_struct *vma; | |
452 | unsigned long nr_pages; | |
453 | LIST_HEAD(pagelist); | |
454 | struct mmap_batch_state state; | |
455 | ||
ceb90fa0 ALC |
456 | switch (version) { |
457 | case 1: | |
458 | if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) | |
459 | return -EFAULT; | |
460 | /* Returns per-frame error in m.arr. */ | |
461 | m.err = NULL; | |
462 | if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) | |
463 | return -EFAULT; | |
464 | break; | |
465 | case 2: | |
466 | if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) | |
467 | return -EFAULT; | |
468 | /* Returns per-frame error code in m.err. */ | |
469 | if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) | |
470 | return -EFAULT; | |
471 | break; | |
472 | default: | |
473 | return -EINVAL; | |
474 | } | |
1c5de193 | 475 | |
4610d240 PD |
476 | /* If restriction is in place, check the domid matches */ |
477 | if (data->domid != DOMID_INVALID && data->domid != m.dom) | |
478 | return -EPERM; | |
479 | ||
5995a68a | 480 | nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); |
1c5de193 JF |
481 | if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) |
482 | return -EINVAL; | |
483 | ||
ceb90fa0 | 484 | ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); |
1c5de193 | 485 | |
ceb90fa0 | 486 | if (ret) |
1c5de193 | 487 | goto out; |
ceb90fa0 ALC |
488 | if (list_empty(&pagelist)) { |
489 | ret = -EINVAL; | |
490 | goto out; | |
491 | } | |
492 | ||
99beae6c ALC |
493 | if (version == 2) { |
494 | /* Zero error array now to only copy back actual errors. */ | |
495 | if (clear_user(m.err, sizeof(int) * m.num)) { | |
496 | ret = -EFAULT; | |
497 | goto out; | |
498 | } | |
ceb90fa0 | 499 | } |
1c5de193 JF |
500 | |
501 | down_write(&mm->mmap_sem); | |
502 | ||
503 | vma = find_vma(mm, m.addr); | |
1c5de193 | 504 | if (!vma || |
a5deabe0 | 505 | vma->vm_ops != &privcmd_vm_ops) { |
68fa965d | 506 | ret = -EINVAL; |
a5deabe0 | 507 | goto out_unlock; |
1c5de193 | 508 | } |
a5deabe0 ALC |
509 | |
510 | /* | |
511 | * Caller must either: | |
512 | * | |
513 | * Map the whole VMA range, which will also allocate all the | |
514 | * pages required for the auto_translated_physmap case. | |
515 | * | |
516 | * Or | |
517 | * | |
518 | * Map unmapped holes left from a previous map attempt (e.g., | |
519 | * because those foreign frames were previously paged out). | |
520 | */ | |
521 | if (vma->vm_private_data == NULL) { | |
522 | if (m.addr != vma->vm_start || | |
523 | m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { | |
524 | ret = -EINVAL; | |
525 | goto out_unlock; | |
526 | } | |
527 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
5995a68a | 528 | ret = alloc_empty_pages(vma, nr_pages); |
a5deabe0 ALC |
529 | if (ret < 0) |
530 | goto out_unlock; | |
531 | } else | |
532 | vma->vm_private_data = PRIV_VMA_LOCKED; | |
533 | } else { | |
534 | if (m.addr < vma->vm_start || | |
535 | m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { | |
536 | ret = -EINVAL; | |
537 | goto out_unlock; | |
538 | } | |
539 | if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { | |
540 | ret = -EINVAL; | |
541 | goto out_unlock; | |
d71f5139 MR |
542 | } |
543 | } | |
1c5de193 | 544 | |
ceb90fa0 ALC |
545 | state.domain = m.dom; |
546 | state.vma = vma; | |
547 | state.va = m.addr; | |
d71f5139 | 548 | state.index = 0; |
ceb90fa0 | 549 | state.global_error = 0; |
99beae6c | 550 | state.version = version; |
1c5de193 | 551 | |
5995a68a | 552 | BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0); |
ceb90fa0 | 553 | /* mmap_batch_fn guarantees ret == 0 */ |
4e8c0c8c DV |
554 | BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), |
555 | &pagelist, mmap_batch_fn, &state)); | |
1c5de193 JF |
556 | |
557 | up_write(&mm->mmap_sem); | |
558 | ||
99beae6c ALC |
559 | if (state.global_error) { |
560 | /* Write back errors in second pass. */ | |
a13d7201 | 561 | state.user_gfn = (xen_pfn_t *)m.arr; |
99beae6c | 562 | state.user_err = m.err; |
4e8c0c8c DV |
563 | ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), |
564 | &pagelist, mmap_return_errors, &state); | |
99beae6c ALC |
565 | } else |
566 | ret = 0; | |
ceb90fa0 ALC |
567 | |
568 | /* If we have not had any EFAULT-like global errors then set the global | |
569 | * error to -ENOENT if necessary. */ | |
570 | if ((ret == 0) && (state.global_error == -ENOENT)) | |
571 | ret = -ENOENT; | |
1c5de193 JF |
572 | |
573 | out: | |
574 | free_page_list(&pagelist); | |
1c5de193 | 575 | return ret; |
a5deabe0 ALC |
576 | |
577 | out_unlock: | |
578 | up_write(&mm->mmap_sem); | |
579 | goto out; | |
1c5de193 JF |
580 | } |
581 | ||
ab520be8 PD |
582 | static int lock_pages( |
583 | struct privcmd_dm_op_buf kbufs[], unsigned int num, | |
584 | struct page *pages[], unsigned int nr_pages) | |
585 | { | |
586 | unsigned int i; | |
587 | ||
588 | for (i = 0; i < num; i++) { | |
589 | unsigned int requested; | |
590 | int pinned; | |
591 | ||
592 | requested = DIV_ROUND_UP( | |
593 | offset_in_page(kbufs[i].uptr) + kbufs[i].size, | |
594 | PAGE_SIZE); | |
595 | if (requested > nr_pages) | |
596 | return -ENOSPC; | |
597 | ||
598 | pinned = get_user_pages_fast( | |
599 | (unsigned long) kbufs[i].uptr, | |
600 | requested, FOLL_WRITE, pages); | |
601 | if (pinned < 0) | |
602 | return pinned; | |
603 | ||
604 | nr_pages -= pinned; | |
605 | pages += pinned; | |
606 | } | |
607 | ||
608 | return 0; | |
609 | } | |
610 | ||
611 | static void unlock_pages(struct page *pages[], unsigned int nr_pages) | |
612 | { | |
613 | unsigned int i; | |
614 | ||
615 | if (!pages) | |
616 | return; | |
617 | ||
618 | for (i = 0; i < nr_pages; i++) { | |
619 | if (pages[i]) | |
620 | put_page(pages[i]); | |
621 | } | |
622 | } | |
623 | ||
4610d240 | 624 | static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) |
ab520be8 | 625 | { |
4610d240 | 626 | struct privcmd_data *data = file->private_data; |
ab520be8 PD |
627 | struct privcmd_dm_op kdata; |
628 | struct privcmd_dm_op_buf *kbufs; | |
629 | unsigned int nr_pages = 0; | |
630 | struct page **pages = NULL; | |
631 | struct xen_dm_op_buf *xbufs = NULL; | |
632 | unsigned int i; | |
633 | long rc; | |
634 | ||
635 | if (copy_from_user(&kdata, udata, sizeof(kdata))) | |
636 | return -EFAULT; | |
637 | ||
4610d240 PD |
638 | /* If restriction is in place, check the domid matches */ |
639 | if (data->domid != DOMID_INVALID && data->domid != kdata.dom) | |
640 | return -EPERM; | |
641 | ||
ab520be8 PD |
642 | if (kdata.num == 0) |
643 | return 0; | |
644 | ||
645 | if (kdata.num > privcmd_dm_op_max_num) | |
646 | return -E2BIG; | |
647 | ||
648 | kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL); | |
649 | if (!kbufs) | |
650 | return -ENOMEM; | |
651 | ||
652 | if (copy_from_user(kbufs, kdata.ubufs, | |
653 | sizeof(*kbufs) * kdata.num)) { | |
654 | rc = -EFAULT; | |
655 | goto out; | |
656 | } | |
657 | ||
658 | for (i = 0; i < kdata.num; i++) { | |
659 | if (kbufs[i].size > privcmd_dm_op_buf_max_size) { | |
660 | rc = -E2BIG; | |
661 | goto out; | |
662 | } | |
663 | ||
664 | if (!access_ok(VERIFY_WRITE, kbufs[i].uptr, | |
665 | kbufs[i].size)) { | |
666 | rc = -EFAULT; | |
667 | goto out; | |
668 | } | |
669 | ||
670 | nr_pages += DIV_ROUND_UP( | |
671 | offset_in_page(kbufs[i].uptr) + kbufs[i].size, | |
672 | PAGE_SIZE); | |
673 | } | |
674 | ||
675 | pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); | |
676 | if (!pages) { | |
677 | rc = -ENOMEM; | |
678 | goto out; | |
679 | } | |
680 | ||
681 | xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL); | |
682 | if (!xbufs) { | |
683 | rc = -ENOMEM; | |
684 | goto out; | |
685 | } | |
686 | ||
687 | rc = lock_pages(kbufs, kdata.num, pages, nr_pages); | |
688 | if (rc) | |
689 | goto out; | |
690 | ||
691 | for (i = 0; i < kdata.num; i++) { | |
692 | set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); | |
693 | xbufs[i].size = kbufs[i].size; | |
694 | } | |
695 | ||
696 | xen_preemptible_hcall_begin(); | |
697 | rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs); | |
698 | xen_preemptible_hcall_end(); | |
699 | ||
700 | out: | |
701 | unlock_pages(pages, nr_pages); | |
702 | kfree(xbufs); | |
703 | kfree(pages); | |
704 | kfree(kbufs); | |
705 | ||
706 | return rc; | |
707 | } | |
708 | ||
4610d240 PD |
709 | static long privcmd_ioctl_restrict(struct file *file, void __user *udata) |
710 | { | |
711 | struct privcmd_data *data = file->private_data; | |
712 | domid_t dom; | |
713 | ||
714 | if (copy_from_user(&dom, udata, sizeof(dom))) | |
715 | return -EFAULT; | |
716 | ||
717 | /* Set restriction to the specified domain, or check it matches */ | |
718 | if (data->domid == DOMID_INVALID) | |
719 | data->domid = dom; | |
720 | else if (data->domid != dom) | |
721 | return -EINVAL; | |
722 | ||
723 | return 0; | |
724 | } | |
725 | ||
3ad08765 PD |
726 | struct remap_pfn { |
727 | struct mm_struct *mm; | |
728 | struct page **pages; | |
729 | pgprot_t prot; | |
730 | unsigned long i; | |
731 | }; | |
732 | ||
733 | static int remap_pfn_fn(pte_t *ptep, pgtable_t token, unsigned long addr, | |
734 | void *data) | |
735 | { | |
736 | struct remap_pfn *r = data; | |
737 | struct page *page = r->pages[r->i]; | |
738 | pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot)); | |
739 | ||
740 | set_pte_at(r->mm, addr, ptep, pte); | |
741 | r->i++; | |
742 | ||
743 | return 0; | |
744 | } | |
745 | ||
746 | static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) | |
747 | { | |
748 | struct privcmd_data *data = file->private_data; | |
749 | struct mm_struct *mm = current->mm; | |
750 | struct vm_area_struct *vma; | |
751 | struct privcmd_mmap_resource kdata; | |
752 | xen_pfn_t *pfns = NULL; | |
753 | struct xen_mem_acquire_resource xdata; | |
754 | int rc; | |
755 | ||
756 | if (copy_from_user(&kdata, udata, sizeof(kdata))) | |
757 | return -EFAULT; | |
758 | ||
759 | /* If restriction is in place, check the domid matches */ | |
760 | if (data->domid != DOMID_INVALID && data->domid != kdata.dom) | |
761 | return -EPERM; | |
762 | ||
763 | down_write(&mm->mmap_sem); | |
764 | ||
765 | vma = find_vma(mm, kdata.addr); | |
766 | if (!vma || vma->vm_ops != &privcmd_vm_ops) { | |
767 | rc = -EINVAL; | |
768 | goto out; | |
769 | } | |
770 | ||
771 | pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL); | |
772 | if (!pfns) { | |
773 | rc = -ENOMEM; | |
774 | goto out; | |
775 | } | |
776 | ||
777 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
778 | unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE); | |
779 | struct page **pages; | |
780 | unsigned int i; | |
781 | ||
782 | rc = alloc_empty_pages(vma, nr); | |
783 | if (rc < 0) | |
784 | goto out; | |
785 | ||
786 | pages = vma->vm_private_data; | |
787 | for (i = 0; i < kdata.num; i++) { | |
788 | xen_pfn_t pfn = | |
789 | page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]); | |
790 | ||
791 | pfns[i] = pfn + (i % XEN_PFN_PER_PAGE); | |
792 | } | |
793 | } else | |
794 | vma->vm_private_data = PRIV_VMA_LOCKED; | |
795 | ||
796 | memset(&xdata, 0, sizeof(xdata)); | |
797 | xdata.domid = kdata.dom; | |
798 | xdata.type = kdata.type; | |
799 | xdata.id = kdata.id; | |
800 | xdata.frame = kdata.idx; | |
801 | xdata.nr_frames = kdata.num; | |
802 | set_xen_guest_handle(xdata.frame_list, pfns); | |
803 | ||
804 | xen_preemptible_hcall_begin(); | |
805 | rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); | |
806 | xen_preemptible_hcall_end(); | |
807 | ||
808 | if (rc) | |
809 | goto out; | |
810 | ||
811 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
812 | struct remap_pfn r = { | |
813 | .mm = vma->vm_mm, | |
814 | .pages = vma->vm_private_data, | |
815 | .prot = vma->vm_page_prot, | |
816 | }; | |
817 | ||
818 | rc = apply_to_page_range(r.mm, kdata.addr, | |
819 | kdata.num << PAGE_SHIFT, | |
820 | remap_pfn_fn, &r); | |
821 | } else { | |
822 | unsigned int domid = | |
823 | (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? | |
824 | DOMID_SELF : kdata.dom; | |
825 | int num; | |
826 | ||
827 | num = xen_remap_domain_mfn_array(vma, | |
828 | kdata.addr & PAGE_MASK, | |
829 | pfns, kdata.num, (int *)pfns, | |
830 | vma->vm_page_prot, | |
831 | domid, | |
832 | vma->vm_private_data); | |
833 | if (num < 0) | |
834 | rc = num; | |
835 | else if (num != kdata.num) { | |
836 | unsigned int i; | |
837 | ||
838 | for (i = 0; i < num; i++) { | |
839 | rc = pfns[i]; | |
840 | if (rc < 0) | |
841 | break; | |
842 | } | |
843 | } else | |
844 | rc = 0; | |
845 | } | |
846 | ||
847 | out: | |
848 | up_write(&mm->mmap_sem); | |
849 | kfree(pfns); | |
850 | ||
851 | return rc; | |
852 | } | |
853 | ||
1c5de193 JF |
854 | static long privcmd_ioctl(struct file *file, |
855 | unsigned int cmd, unsigned long data) | |
856 | { | |
dc9eab6f | 857 | int ret = -ENOTTY; |
1c5de193 JF |
858 | void __user *udata = (void __user *) data; |
859 | ||
860 | switch (cmd) { | |
861 | case IOCTL_PRIVCMD_HYPERCALL: | |
4610d240 | 862 | ret = privcmd_ioctl_hypercall(file, udata); |
1c5de193 JF |
863 | break; |
864 | ||
865 | case IOCTL_PRIVCMD_MMAP: | |
4610d240 | 866 | ret = privcmd_ioctl_mmap(file, udata); |
1c5de193 JF |
867 | break; |
868 | ||
869 | case IOCTL_PRIVCMD_MMAPBATCH: | |
4610d240 | 870 | ret = privcmd_ioctl_mmap_batch(file, udata, 1); |
ceb90fa0 ALC |
871 | break; |
872 | ||
873 | case IOCTL_PRIVCMD_MMAPBATCH_V2: | |
4610d240 | 874 | ret = privcmd_ioctl_mmap_batch(file, udata, 2); |
1c5de193 JF |
875 | break; |
876 | ||
ab520be8 | 877 | case IOCTL_PRIVCMD_DM_OP: |
4610d240 PD |
878 | ret = privcmd_ioctl_dm_op(file, udata); |
879 | break; | |
880 | ||
881 | case IOCTL_PRIVCMD_RESTRICT: | |
882 | ret = privcmd_ioctl_restrict(file, udata); | |
ab520be8 PD |
883 | break; |
884 | ||
3ad08765 PD |
885 | case IOCTL_PRIVCMD_MMAP_RESOURCE: |
886 | ret = privcmd_ioctl_mmap_resource(file, udata); | |
887 | break; | |
888 | ||
1c5de193 | 889 | default: |
1c5de193 JF |
890 | break; |
891 | } | |
892 | ||
893 | return ret; | |
894 | } | |
895 | ||
4610d240 PD |
896 | static int privcmd_open(struct inode *ino, struct file *file) |
897 | { | |
898 | struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL); | |
899 | ||
900 | if (!data) | |
901 | return -ENOMEM; | |
902 | ||
903 | /* DOMID_INVALID implies no restriction */ | |
904 | data->domid = DOMID_INVALID; | |
905 | ||
906 | file->private_data = data; | |
907 | return 0; | |
908 | } | |
909 | ||
910 | static int privcmd_release(struct inode *ino, struct file *file) | |
911 | { | |
912 | struct privcmd_data *data = file->private_data; | |
913 | ||
914 | kfree(data); | |
915 | return 0; | |
916 | } | |
917 | ||
d71f5139 MR |
918 | static void privcmd_close(struct vm_area_struct *vma) |
919 | { | |
920 | struct page **pages = vma->vm_private_data; | |
c7ebf9d9 | 921 | int numpgs = vma_pages(vma); |
5995a68a | 922 | int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; |
b6497b38 | 923 | int rc; |
d71f5139 | 924 | |
9eff37a8 | 925 | if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) |
d71f5139 MR |
926 | return; |
927 | ||
5995a68a | 928 | rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); |
b6497b38 IC |
929 | if (rc == 0) |
930 | free_xenballooned_pages(numpgs, pages); | |
931 | else | |
932 | pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", | |
933 | numpgs, rc); | |
d71f5139 MR |
934 | kfree(pages); |
935 | } | |
936 | ||
4bf2cc96 | 937 | static vm_fault_t privcmd_fault(struct vm_fault *vmf) |
1c5de193 | 938 | { |
441c7416 | 939 | printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", |
11bac800 | 940 | vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end, |
1a29d85e | 941 | vmf->pgoff, (void *)vmf->address); |
441c7416 | 942 | |
1c5de193 JF |
943 | return VM_FAULT_SIGBUS; |
944 | } | |
945 | ||
7cbea8dc | 946 | static const struct vm_operations_struct privcmd_vm_ops = { |
d71f5139 | 947 | .close = privcmd_close, |
1c5de193 JF |
948 | .fault = privcmd_fault |
949 | }; | |
950 | ||
951 | static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) | |
952 | { | |
e060e7af SS |
953 | /* DONTCOPY is essential for Xen because copy_page_range doesn't know |
954 | * how to recreate these mappings */ | |
314e51b9 KK |
955 | vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | |
956 | VM_DONTEXPAND | VM_DONTDUMP; | |
1c5de193 JF |
957 | vma->vm_ops = &privcmd_vm_ops; |
958 | vma->vm_private_data = NULL; | |
959 | ||
960 | return 0; | |
961 | } | |
962 | ||
a5deabe0 ALC |
963 | /* |
964 | * For MMAPBATCH*. This allows asserting the singleshot mapping | |
965 | * on a per pfn/pte basis. Mapping calls that fail with ENOENT | |
966 | * can be then retried until success. | |
967 | */ | |
968 | static int is_mapped_fn(pte_t *pte, struct page *pmd_page, | |
969 | unsigned long addr, void *data) | |
970 | { | |
971 | return pte_none(*pte) ? 0 : -EBUSY; | |
972 | } | |
973 | ||
974 | static int privcmd_vma_range_is_mapped( | |
975 | struct vm_area_struct *vma, | |
976 | unsigned long addr, | |
977 | unsigned long nr_pages) | |
1c5de193 | 978 | { |
a5deabe0 ALC |
979 | return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, |
980 | is_mapped_fn, NULL) != 0; | |
1c5de193 | 981 | } |
1c5de193 | 982 | |
d8414d3c BB |
983 | const struct file_operations xen_privcmd_fops = { |
984 | .owner = THIS_MODULE, | |
1c5de193 | 985 | .unlocked_ioctl = privcmd_ioctl, |
4610d240 PD |
986 | .open = privcmd_open, |
987 | .release = privcmd_release, | |
1c5de193 JF |
988 | .mmap = privcmd_mmap, |
989 | }; | |
d8414d3c BB |
990 | EXPORT_SYMBOL_GPL(xen_privcmd_fops); |
991 | ||
992 | static struct miscdevice privcmd_dev = { | |
993 | .minor = MISC_DYNAMIC_MINOR, | |
994 | .name = "xen/privcmd", | |
995 | .fops = &xen_privcmd_fops, | |
996 | }; | |
997 | ||
998 | static int __init privcmd_init(void) | |
999 | { | |
1000 | int err; | |
1001 | ||
1002 | if (!xen_domain()) | |
1003 | return -ENODEV; | |
1004 | ||
1005 | err = misc_register(&privcmd_dev); | |
1006 | if (err != 0) { | |
283c0972 | 1007 | pr_err("Could not register Xen privcmd device\n"); |
d8414d3c BB |
1008 | return err; |
1009 | } | |
1010 | return 0; | |
1011 | } | |
1012 | ||
1013 | static void __exit privcmd_exit(void) | |
1014 | { | |
1015 | misc_deregister(&privcmd_dev); | |
1016 | } | |
1017 | ||
1018 | module_init(privcmd_init); | |
1019 | module_exit(privcmd_exit); |