]>
Commit | Line | Data |
---|---|---|
043405e1 CO |
1 | /* |
2 | * Kernel-based Virtual Machine driver for Linux | |
3 | * | |
4 | * derived from drivers/kvm/kvm_main.c | |
5 | * | |
6 | * Copyright (C) 2006 Qumranet, Inc. | |
7 | * | |
8 | * Authors: | |
9 | * Avi Kivity <avi@qumranet.com> | |
10 | * Yaniv Kamay <yaniv@qumranet.com> | |
11 | * | |
12 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
13 | * the COPYING file in the top-level directory. | |
14 | * | |
15 | */ | |
16 | ||
313a3dc7 | 17 | #include "kvm.h" |
043405e1 | 18 | #include "x86.h" |
313a3dc7 CO |
19 | #include "irq.h" |
20 | ||
21 | #include <linux/kvm.h> | |
22 | #include <linux/fs.h> | |
23 | #include <linux/vmalloc.h> | |
043405e1 CO |
24 | |
25 | #include <asm/uaccess.h> | |
26 | ||
313a3dc7 CO |
27 | #define MAX_IO_MSRS 256 |
28 | ||
043405e1 CO |
29 | /* |
30 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | |
31 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | |
32 | * | |
33 | * This list is modified at module load time to reflect the | |
34 | * capabilities of the host cpu. | |
35 | */ | |
36 | static u32 msrs_to_save[] = { | |
37 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | |
38 | MSR_K6_STAR, | |
39 | #ifdef CONFIG_X86_64 | |
40 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | |
41 | #endif | |
42 | MSR_IA32_TIME_STAMP_COUNTER, | |
43 | }; | |
44 | ||
45 | static unsigned num_msrs_to_save; | |
46 | ||
47 | static u32 emulated_msrs[] = { | |
48 | MSR_IA32_MISC_ENABLE, | |
49 | }; | |
50 | ||
313a3dc7 CO |
51 | /* |
52 | * Adapt set_msr() to msr_io()'s calling convention | |
53 | */ | |
54 | static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |
55 | { | |
56 | return kvm_set_msr(vcpu, index, *data); | |
57 | } | |
58 | ||
59 | /* | |
60 | * Read or write a bunch of msrs. All parameters are kernel addresses. | |
61 | * | |
62 | * @return number of msrs set successfully. | |
63 | */ | |
64 | static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |
65 | struct kvm_msr_entry *entries, | |
66 | int (*do_msr)(struct kvm_vcpu *vcpu, | |
67 | unsigned index, u64 *data)) | |
68 | { | |
69 | int i; | |
70 | ||
71 | vcpu_load(vcpu); | |
72 | ||
73 | for (i = 0; i < msrs->nmsrs; ++i) | |
74 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | |
75 | break; | |
76 | ||
77 | vcpu_put(vcpu); | |
78 | ||
79 | return i; | |
80 | } | |
81 | ||
82 | /* | |
83 | * Read or write a bunch of msrs. Parameters are user addresses. | |
84 | * | |
85 | * @return number of msrs set successfully. | |
86 | */ | |
87 | static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, | |
88 | int (*do_msr)(struct kvm_vcpu *vcpu, | |
89 | unsigned index, u64 *data), | |
90 | int writeback) | |
91 | { | |
92 | struct kvm_msrs msrs; | |
93 | struct kvm_msr_entry *entries; | |
94 | int r, n; | |
95 | unsigned size; | |
96 | ||
97 | r = -EFAULT; | |
98 | if (copy_from_user(&msrs, user_msrs, sizeof msrs)) | |
99 | goto out; | |
100 | ||
101 | r = -E2BIG; | |
102 | if (msrs.nmsrs >= MAX_IO_MSRS) | |
103 | goto out; | |
104 | ||
105 | r = -ENOMEM; | |
106 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; | |
107 | entries = vmalloc(size); | |
108 | if (!entries) | |
109 | goto out; | |
110 | ||
111 | r = -EFAULT; | |
112 | if (copy_from_user(entries, user_msrs->entries, size)) | |
113 | goto out_free; | |
114 | ||
115 | r = n = __msr_io(vcpu, &msrs, entries, do_msr); | |
116 | if (r < 0) | |
117 | goto out_free; | |
118 | ||
119 | r = -EFAULT; | |
120 | if (writeback && copy_to_user(user_msrs->entries, entries, size)) | |
121 | goto out_free; | |
122 | ||
123 | r = n; | |
124 | ||
125 | out_free: | |
126 | vfree(entries); | |
127 | out: | |
128 | return r; | |
129 | } | |
130 | ||
043405e1 CO |
131 | long kvm_arch_dev_ioctl(struct file *filp, |
132 | unsigned int ioctl, unsigned long arg) | |
133 | { | |
134 | void __user *argp = (void __user *)arg; | |
135 | long r; | |
136 | ||
137 | switch (ioctl) { | |
138 | case KVM_GET_MSR_INDEX_LIST: { | |
139 | struct kvm_msr_list __user *user_msr_list = argp; | |
140 | struct kvm_msr_list msr_list; | |
141 | unsigned n; | |
142 | ||
143 | r = -EFAULT; | |
144 | if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) | |
145 | goto out; | |
146 | n = msr_list.nmsrs; | |
147 | msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs); | |
148 | if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) | |
149 | goto out; | |
150 | r = -E2BIG; | |
151 | if (n < num_msrs_to_save) | |
152 | goto out; | |
153 | r = -EFAULT; | |
154 | if (copy_to_user(user_msr_list->indices, &msrs_to_save, | |
155 | num_msrs_to_save * sizeof(u32))) | |
156 | goto out; | |
157 | if (copy_to_user(user_msr_list->indices | |
158 | + num_msrs_to_save * sizeof(u32), | |
159 | &emulated_msrs, | |
160 | ARRAY_SIZE(emulated_msrs) * sizeof(u32))) | |
161 | goto out; | |
162 | r = 0; | |
163 | break; | |
164 | } | |
165 | default: | |
166 | r = -EINVAL; | |
167 | } | |
168 | out: | |
169 | return r; | |
170 | } | |
171 | ||
313a3dc7 CO |
172 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
173 | { | |
174 | kvm_x86_ops->vcpu_load(vcpu, cpu); | |
175 | } | |
176 | ||
177 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |
178 | { | |
179 | kvm_x86_ops->vcpu_put(vcpu); | |
180 | } | |
181 | ||
182 | static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | |
183 | { | |
184 | u64 efer; | |
185 | int i; | |
186 | struct kvm_cpuid_entry *e, *entry; | |
187 | ||
188 | rdmsrl(MSR_EFER, efer); | |
189 | entry = NULL; | |
190 | for (i = 0; i < vcpu->cpuid_nent; ++i) { | |
191 | e = &vcpu->cpuid_entries[i]; | |
192 | if (e->function == 0x80000001) { | |
193 | entry = e; | |
194 | break; | |
195 | } | |
196 | } | |
197 | if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { | |
198 | entry->edx &= ~(1 << 20); | |
199 | printk(KERN_INFO "kvm: guest NX capability removed\n"); | |
200 | } | |
201 | } | |
202 | ||
203 | static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |
204 | struct kvm_cpuid *cpuid, | |
205 | struct kvm_cpuid_entry __user *entries) | |
206 | { | |
207 | int r; | |
208 | ||
209 | r = -E2BIG; | |
210 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | |
211 | goto out; | |
212 | r = -EFAULT; | |
213 | if (copy_from_user(&vcpu->cpuid_entries, entries, | |
214 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | |
215 | goto out; | |
216 | vcpu->cpuid_nent = cpuid->nent; | |
217 | cpuid_fix_nx_cap(vcpu); | |
218 | return 0; | |
219 | ||
220 | out: | |
221 | return r; | |
222 | } | |
223 | ||
224 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | |
225 | struct kvm_lapic_state *s) | |
226 | { | |
227 | vcpu_load(vcpu); | |
228 | memcpy(s->regs, vcpu->apic->regs, sizeof *s); | |
229 | vcpu_put(vcpu); | |
230 | ||
231 | return 0; | |
232 | } | |
233 | ||
234 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | |
235 | struct kvm_lapic_state *s) | |
236 | { | |
237 | vcpu_load(vcpu); | |
238 | memcpy(vcpu->apic->regs, s->regs, sizeof *s); | |
239 | kvm_apic_post_state_restore(vcpu); | |
240 | vcpu_put(vcpu); | |
241 | ||
242 | return 0; | |
243 | } | |
244 | ||
245 | long kvm_arch_vcpu_ioctl(struct file *filp, | |
246 | unsigned int ioctl, unsigned long arg) | |
247 | { | |
248 | struct kvm_vcpu *vcpu = filp->private_data; | |
249 | void __user *argp = (void __user *)arg; | |
250 | int r; | |
251 | ||
252 | switch (ioctl) { | |
253 | case KVM_GET_LAPIC: { | |
254 | struct kvm_lapic_state lapic; | |
255 | ||
256 | memset(&lapic, 0, sizeof lapic); | |
257 | r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); | |
258 | if (r) | |
259 | goto out; | |
260 | r = -EFAULT; | |
261 | if (copy_to_user(argp, &lapic, sizeof lapic)) | |
262 | goto out; | |
263 | r = 0; | |
264 | break; | |
265 | } | |
266 | case KVM_SET_LAPIC: { | |
267 | struct kvm_lapic_state lapic; | |
268 | ||
269 | r = -EFAULT; | |
270 | if (copy_from_user(&lapic, argp, sizeof lapic)) | |
271 | goto out; | |
272 | r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; | |
273 | if (r) | |
274 | goto out; | |
275 | r = 0; | |
276 | break; | |
277 | } | |
278 | case KVM_SET_CPUID: { | |
279 | struct kvm_cpuid __user *cpuid_arg = argp; | |
280 | struct kvm_cpuid cpuid; | |
281 | ||
282 | r = -EFAULT; | |
283 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) | |
284 | goto out; | |
285 | r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); | |
286 | if (r) | |
287 | goto out; | |
288 | break; | |
289 | } | |
290 | case KVM_GET_MSRS: | |
291 | r = msr_io(vcpu, argp, kvm_get_msr, 1); | |
292 | break; | |
293 | case KVM_SET_MSRS: | |
294 | r = msr_io(vcpu, argp, do_set_msr, 0); | |
295 | break; | |
296 | default: | |
297 | r = -EINVAL; | |
298 | } | |
299 | out: | |
300 | return r; | |
301 | } | |
302 | ||
1fe779f8 CO |
303 | static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) |
304 | { | |
305 | int ret; | |
306 | ||
307 | if (addr > (unsigned int)(-3 * PAGE_SIZE)) | |
308 | return -1; | |
309 | ret = kvm_x86_ops->set_tss_addr(kvm, addr); | |
310 | return ret; | |
311 | } | |
312 | ||
313 | static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |
314 | u32 kvm_nr_mmu_pages) | |
315 | { | |
316 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) | |
317 | return -EINVAL; | |
318 | ||
319 | mutex_lock(&kvm->lock); | |
320 | ||
321 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | |
322 | kvm->n_requested_mmu_pages = kvm_nr_mmu_pages; | |
323 | ||
324 | mutex_unlock(&kvm->lock); | |
325 | return 0; | |
326 | } | |
327 | ||
328 | static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |
329 | { | |
330 | return kvm->n_alloc_mmu_pages; | |
331 | } | |
332 | ||
333 | /* | |
334 | * Set a new alias region. Aliases map a portion of physical memory into | |
335 | * another portion. This is useful for memory windows, for example the PC | |
336 | * VGA region. | |
337 | */ | |
338 | static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |
339 | struct kvm_memory_alias *alias) | |
340 | { | |
341 | int r, n; | |
342 | struct kvm_mem_alias *p; | |
343 | ||
344 | r = -EINVAL; | |
345 | /* General sanity checks */ | |
346 | if (alias->memory_size & (PAGE_SIZE - 1)) | |
347 | goto out; | |
348 | if (alias->guest_phys_addr & (PAGE_SIZE - 1)) | |
349 | goto out; | |
350 | if (alias->slot >= KVM_ALIAS_SLOTS) | |
351 | goto out; | |
352 | if (alias->guest_phys_addr + alias->memory_size | |
353 | < alias->guest_phys_addr) | |
354 | goto out; | |
355 | if (alias->target_phys_addr + alias->memory_size | |
356 | < alias->target_phys_addr) | |
357 | goto out; | |
358 | ||
359 | mutex_lock(&kvm->lock); | |
360 | ||
361 | p = &kvm->aliases[alias->slot]; | |
362 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | |
363 | p->npages = alias->memory_size >> PAGE_SHIFT; | |
364 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | |
365 | ||
366 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | |
367 | if (kvm->aliases[n - 1].npages) | |
368 | break; | |
369 | kvm->naliases = n; | |
370 | ||
371 | kvm_mmu_zap_all(kvm); | |
372 | ||
373 | mutex_unlock(&kvm->lock); | |
374 | ||
375 | return 0; | |
376 | ||
377 | out: | |
378 | return r; | |
379 | } | |
380 | ||
381 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |
382 | { | |
383 | int r; | |
384 | ||
385 | r = 0; | |
386 | switch (chip->chip_id) { | |
387 | case KVM_IRQCHIP_PIC_MASTER: | |
388 | memcpy(&chip->chip.pic, | |
389 | &pic_irqchip(kvm)->pics[0], | |
390 | sizeof(struct kvm_pic_state)); | |
391 | break; | |
392 | case KVM_IRQCHIP_PIC_SLAVE: | |
393 | memcpy(&chip->chip.pic, | |
394 | &pic_irqchip(kvm)->pics[1], | |
395 | sizeof(struct kvm_pic_state)); | |
396 | break; | |
397 | case KVM_IRQCHIP_IOAPIC: | |
398 | memcpy(&chip->chip.ioapic, | |
399 | ioapic_irqchip(kvm), | |
400 | sizeof(struct kvm_ioapic_state)); | |
401 | break; | |
402 | default: | |
403 | r = -EINVAL; | |
404 | break; | |
405 | } | |
406 | return r; | |
407 | } | |
408 | ||
409 | static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |
410 | { | |
411 | int r; | |
412 | ||
413 | r = 0; | |
414 | switch (chip->chip_id) { | |
415 | case KVM_IRQCHIP_PIC_MASTER: | |
416 | memcpy(&pic_irqchip(kvm)->pics[0], | |
417 | &chip->chip.pic, | |
418 | sizeof(struct kvm_pic_state)); | |
419 | break; | |
420 | case KVM_IRQCHIP_PIC_SLAVE: | |
421 | memcpy(&pic_irqchip(kvm)->pics[1], | |
422 | &chip->chip.pic, | |
423 | sizeof(struct kvm_pic_state)); | |
424 | break; | |
425 | case KVM_IRQCHIP_IOAPIC: | |
426 | memcpy(ioapic_irqchip(kvm), | |
427 | &chip->chip.ioapic, | |
428 | sizeof(struct kvm_ioapic_state)); | |
429 | break; | |
430 | default: | |
431 | r = -EINVAL; | |
432 | break; | |
433 | } | |
434 | kvm_pic_update_irq(pic_irqchip(kvm)); | |
435 | return r; | |
436 | } | |
437 | ||
438 | long kvm_arch_vm_ioctl(struct file *filp, | |
439 | unsigned int ioctl, unsigned long arg) | |
440 | { | |
441 | struct kvm *kvm = filp->private_data; | |
442 | void __user *argp = (void __user *)arg; | |
443 | int r = -EINVAL; | |
444 | ||
445 | switch (ioctl) { | |
446 | case KVM_SET_TSS_ADDR: | |
447 | r = kvm_vm_ioctl_set_tss_addr(kvm, arg); | |
448 | if (r < 0) | |
449 | goto out; | |
450 | break; | |
451 | case KVM_SET_MEMORY_REGION: { | |
452 | struct kvm_memory_region kvm_mem; | |
453 | struct kvm_userspace_memory_region kvm_userspace_mem; | |
454 | ||
455 | r = -EFAULT; | |
456 | if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) | |
457 | goto out; | |
458 | kvm_userspace_mem.slot = kvm_mem.slot; | |
459 | kvm_userspace_mem.flags = kvm_mem.flags; | |
460 | kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; | |
461 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; | |
462 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); | |
463 | if (r) | |
464 | goto out; | |
465 | break; | |
466 | } | |
467 | case KVM_SET_NR_MMU_PAGES: | |
468 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); | |
469 | if (r) | |
470 | goto out; | |
471 | break; | |
472 | case KVM_GET_NR_MMU_PAGES: | |
473 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); | |
474 | break; | |
475 | case KVM_SET_MEMORY_ALIAS: { | |
476 | struct kvm_memory_alias alias; | |
477 | ||
478 | r = -EFAULT; | |
479 | if (copy_from_user(&alias, argp, sizeof alias)) | |
480 | goto out; | |
481 | r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); | |
482 | if (r) | |
483 | goto out; | |
484 | break; | |
485 | } | |
486 | case KVM_CREATE_IRQCHIP: | |
487 | r = -ENOMEM; | |
488 | kvm->vpic = kvm_create_pic(kvm); | |
489 | if (kvm->vpic) { | |
490 | r = kvm_ioapic_init(kvm); | |
491 | if (r) { | |
492 | kfree(kvm->vpic); | |
493 | kvm->vpic = NULL; | |
494 | goto out; | |
495 | } | |
496 | } else | |
497 | goto out; | |
498 | break; | |
499 | case KVM_IRQ_LINE: { | |
500 | struct kvm_irq_level irq_event; | |
501 | ||
502 | r = -EFAULT; | |
503 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | |
504 | goto out; | |
505 | if (irqchip_in_kernel(kvm)) { | |
506 | mutex_lock(&kvm->lock); | |
507 | if (irq_event.irq < 16) | |
508 | kvm_pic_set_irq(pic_irqchip(kvm), | |
509 | irq_event.irq, | |
510 | irq_event.level); | |
511 | kvm_ioapic_set_irq(kvm->vioapic, | |
512 | irq_event.irq, | |
513 | irq_event.level); | |
514 | mutex_unlock(&kvm->lock); | |
515 | r = 0; | |
516 | } | |
517 | break; | |
518 | } | |
519 | case KVM_GET_IRQCHIP: { | |
520 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | |
521 | struct kvm_irqchip chip; | |
522 | ||
523 | r = -EFAULT; | |
524 | if (copy_from_user(&chip, argp, sizeof chip)) | |
525 | goto out; | |
526 | r = -ENXIO; | |
527 | if (!irqchip_in_kernel(kvm)) | |
528 | goto out; | |
529 | r = kvm_vm_ioctl_get_irqchip(kvm, &chip); | |
530 | if (r) | |
531 | goto out; | |
532 | r = -EFAULT; | |
533 | if (copy_to_user(argp, &chip, sizeof chip)) | |
534 | goto out; | |
535 | r = 0; | |
536 | break; | |
537 | } | |
538 | case KVM_SET_IRQCHIP: { | |
539 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | |
540 | struct kvm_irqchip chip; | |
541 | ||
542 | r = -EFAULT; | |
543 | if (copy_from_user(&chip, argp, sizeof chip)) | |
544 | goto out; | |
545 | r = -ENXIO; | |
546 | if (!irqchip_in_kernel(kvm)) | |
547 | goto out; | |
548 | r = kvm_vm_ioctl_set_irqchip(kvm, &chip); | |
549 | if (r) | |
550 | goto out; | |
551 | r = 0; | |
552 | break; | |
553 | } | |
554 | default: | |
555 | ; | |
556 | } | |
557 | out: | |
558 | return r; | |
559 | } | |
560 | ||
043405e1 CO |
561 | static __init void kvm_init_msr_list(void) |
562 | { | |
563 | u32 dummy[2]; | |
564 | unsigned i, j; | |
565 | ||
566 | for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { | |
567 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) | |
568 | continue; | |
569 | if (j < i) | |
570 | msrs_to_save[j] = msrs_to_save[i]; | |
571 | j++; | |
572 | } | |
573 | num_msrs_to_save = j; | |
574 | } | |
575 | ||
576 | __init void kvm_arch_init(void) | |
577 | { | |
578 | kvm_init_msr_list(); | |
579 | } |