]>
Commit | Line | Data |
---|---|---|
05330448 AL |
1 | /* |
2 | * QEMU KVM support | |
3 | * | |
4 | * Copyright IBM, Corp. 2008 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <sys/types.h> | |
15 | #include <sys/ioctl.h> | |
16 | #include <sys/mman.h> | |
984b5181 | 17 | #include <stdarg.h> |
05330448 AL |
18 | |
19 | #include <linux/kvm.h> | |
20 | ||
21 | #include "qemu-common.h" | |
22 | #include "sysemu.h" | |
23 | #include "kvm.h" | |
24 | ||
25 | //#define DEBUG_KVM | |
26 | ||
27 | #ifdef DEBUG_KVM | |
28 | #define dprintf(fmt, ...) \ | |
29 | do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) | |
30 | #else | |
31 | #define dprintf(fmt, ...) \ | |
32 | do { } while (0) | |
33 | #endif | |
34 | ||
35 | typedef struct kvm_userspace_memory_region KVMSlot; | |
36 | ||
37 | int kvm_allowed = 0; | |
38 | ||
39 | struct KVMState | |
40 | { | |
41 | KVMSlot slots[32]; | |
42 | int fd; | |
43 | int vmfd; | |
44 | }; | |
45 | ||
46 | static KVMState *kvm_state; | |
47 | ||
48 | static KVMSlot *kvm_alloc_slot(KVMState *s) | |
49 | { | |
50 | int i; | |
51 | ||
52 | for (i = 0; i < ARRAY_SIZE(s->slots); i++) { | |
62d60e8c AL |
53 | /* KVM private memory slots */ |
54 | if (i >= 8 && i < 12) | |
55 | continue; | |
05330448 AL |
56 | if (s->slots[i].memory_size == 0) |
57 | return &s->slots[i]; | |
58 | } | |
59 | ||
60 | return NULL; | |
61 | } | |
62 | ||
63 | static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr) | |
64 | { | |
65 | int i; | |
66 | ||
67 | for (i = 0; i < ARRAY_SIZE(s->slots); i++) { | |
68 | KVMSlot *mem = &s->slots[i]; | |
69 | ||
70 | if (start_addr >= mem->guest_phys_addr && | |
71 | start_addr < (mem->guest_phys_addr + mem->memory_size)) | |
72 | return mem; | |
73 | } | |
74 | ||
75 | return NULL; | |
76 | } | |
77 | ||
78 | int kvm_init_vcpu(CPUState *env) | |
79 | { | |
80 | KVMState *s = kvm_state; | |
81 | long mmap_size; | |
82 | int ret; | |
83 | ||
84 | dprintf("kvm_init_vcpu\n"); | |
85 | ||
984b5181 | 86 | ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index); |
05330448 AL |
87 | if (ret < 0) { |
88 | dprintf("kvm_create_vcpu failed\n"); | |
89 | goto err; | |
90 | } | |
91 | ||
92 | env->kvm_fd = ret; | |
93 | env->kvm_state = s; | |
94 | ||
95 | mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); | |
96 | if (mmap_size < 0) { | |
97 | dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n"); | |
98 | goto err; | |
99 | } | |
100 | ||
101 | env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, | |
102 | env->kvm_fd, 0); | |
103 | if (env->kvm_run == MAP_FAILED) { | |
104 | ret = -errno; | |
105 | dprintf("mmap'ing vcpu state failed\n"); | |
106 | goto err; | |
107 | } | |
108 | ||
109 | ret = kvm_arch_init_vcpu(env); | |
110 | ||
111 | err: | |
112 | return ret; | |
113 | } | |
114 | ||
115 | int kvm_init(int smp_cpus) | |
116 | { | |
117 | KVMState *s; | |
118 | int ret; | |
119 | int i; | |
120 | ||
121 | if (smp_cpus > 1) | |
122 | return -EINVAL; | |
123 | ||
124 | s = qemu_mallocz(sizeof(KVMState)); | |
125 | if (s == NULL) | |
126 | return -ENOMEM; | |
127 | ||
128 | for (i = 0; i < ARRAY_SIZE(s->slots); i++) | |
129 | s->slots[i].slot = i; | |
130 | ||
131 | s->vmfd = -1; | |
132 | s->fd = open("/dev/kvm", O_RDWR); | |
133 | if (s->fd == -1) { | |
134 | fprintf(stderr, "Could not access KVM kernel module: %m\n"); | |
135 | ret = -errno; | |
136 | goto err; | |
137 | } | |
138 | ||
139 | ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0); | |
140 | if (ret < KVM_API_VERSION) { | |
141 | if (ret > 0) | |
142 | ret = -EINVAL; | |
143 | fprintf(stderr, "kvm version too old\n"); | |
144 | goto err; | |
145 | } | |
146 | ||
147 | if (ret > KVM_API_VERSION) { | |
148 | ret = -EINVAL; | |
149 | fprintf(stderr, "kvm version not supported\n"); | |
150 | goto err; | |
151 | } | |
152 | ||
153 | s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0); | |
154 | if (s->vmfd < 0) | |
155 | goto err; | |
156 | ||
157 | /* initially, KVM allocated its own memory and we had to jump through | |
158 | * hooks to make phys_ram_base point to this. Modern versions of KVM | |
159 | * just use a user allocated buffer so we can use phys_ram_base | |
160 | * unmodified. Make sure we have a sufficiently modern version of KVM. | |
161 | */ | |
984b5181 | 162 | ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY); |
05330448 AL |
163 | if (ret <= 0) { |
164 | if (ret == 0) | |
165 | ret = -EINVAL; | |
166 | fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n"); | |
167 | goto err; | |
168 | } | |
169 | ||
170 | ret = kvm_arch_init(s, smp_cpus); | |
171 | if (ret < 0) | |
172 | goto err; | |
173 | ||
174 | kvm_state = s; | |
175 | ||
176 | return 0; | |
177 | ||
178 | err: | |
179 | if (s) { | |
180 | if (s->vmfd != -1) | |
181 | close(s->vmfd); | |
182 | if (s->fd != -1) | |
183 | close(s->fd); | |
184 | } | |
185 | qemu_free(s); | |
186 | ||
187 | return ret; | |
188 | } | |
189 | ||
190 | static int kvm_handle_io(CPUState *env, uint16_t port, void *data, | |
191 | int direction, int size, uint32_t count) | |
192 | { | |
193 | int i; | |
194 | uint8_t *ptr = data; | |
195 | ||
196 | for (i = 0; i < count; i++) { | |
197 | if (direction == KVM_EXIT_IO_IN) { | |
198 | switch (size) { | |
199 | case 1: | |
200 | stb_p(ptr, cpu_inb(env, port)); | |
201 | break; | |
202 | case 2: | |
203 | stw_p(ptr, cpu_inw(env, port)); | |
204 | break; | |
205 | case 4: | |
206 | stl_p(ptr, cpu_inl(env, port)); | |
207 | break; | |
208 | } | |
209 | } else { | |
210 | switch (size) { | |
211 | case 1: | |
212 | cpu_outb(env, port, ldub_p(ptr)); | |
213 | break; | |
214 | case 2: | |
215 | cpu_outw(env, port, lduw_p(ptr)); | |
216 | break; | |
217 | case 4: | |
218 | cpu_outl(env, port, ldl_p(ptr)); | |
219 | break; | |
220 | } | |
221 | } | |
222 | ||
223 | ptr += size; | |
224 | } | |
225 | ||
226 | return 1; | |
227 | } | |
228 | ||
229 | int kvm_cpu_exec(CPUState *env) | |
230 | { | |
231 | struct kvm_run *run = env->kvm_run; | |
232 | int ret; | |
233 | ||
234 | dprintf("kvm_cpu_exec()\n"); | |
235 | ||
236 | do { | |
237 | kvm_arch_pre_run(env, run); | |
238 | ||
239 | if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { | |
240 | dprintf("interrupt exit requested\n"); | |
241 | ret = 0; | |
242 | break; | |
243 | } | |
244 | ||
245 | ret = kvm_vcpu_ioctl(env, KVM_RUN, 0); | |
246 | kvm_arch_post_run(env, run); | |
247 | ||
248 | if (ret == -EINTR || ret == -EAGAIN) { | |
249 | dprintf("io window exit\n"); | |
250 | ret = 0; | |
251 | break; | |
252 | } | |
253 | ||
254 | if (ret < 0) { | |
255 | dprintf("kvm run failed %s\n", strerror(-ret)); | |
256 | abort(); | |
257 | } | |
258 | ||
259 | ret = 0; /* exit loop */ | |
260 | switch (run->exit_reason) { | |
261 | case KVM_EXIT_IO: | |
262 | dprintf("handle_io\n"); | |
263 | ret = kvm_handle_io(env, run->io.port, | |
264 | (uint8_t *)run + run->io.data_offset, | |
265 | run->io.direction, | |
266 | run->io.size, | |
267 | run->io.count); | |
268 | break; | |
269 | case KVM_EXIT_MMIO: | |
270 | dprintf("handle_mmio\n"); | |
271 | cpu_physical_memory_rw(run->mmio.phys_addr, | |
272 | run->mmio.data, | |
273 | run->mmio.len, | |
274 | run->mmio.is_write); | |
275 | ret = 1; | |
276 | break; | |
277 | case KVM_EXIT_IRQ_WINDOW_OPEN: | |
278 | dprintf("irq_window_open\n"); | |
279 | break; | |
280 | case KVM_EXIT_SHUTDOWN: | |
281 | dprintf("shutdown\n"); | |
282 | qemu_system_reset_request(); | |
283 | ret = 1; | |
284 | break; | |
285 | case KVM_EXIT_UNKNOWN: | |
286 | dprintf("kvm_exit_unknown\n"); | |
287 | break; | |
288 | case KVM_EXIT_FAIL_ENTRY: | |
289 | dprintf("kvm_exit_fail_entry\n"); | |
290 | break; | |
291 | case KVM_EXIT_EXCEPTION: | |
292 | dprintf("kvm_exit_exception\n"); | |
293 | break; | |
294 | case KVM_EXIT_DEBUG: | |
295 | dprintf("kvm_exit_debug\n"); | |
296 | break; | |
297 | default: | |
298 | dprintf("kvm_arch_handle_exit\n"); | |
299 | ret = kvm_arch_handle_exit(env, run); | |
300 | break; | |
301 | } | |
302 | } while (ret > 0); | |
303 | ||
becfc390 AL |
304 | if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { |
305 | env->interrupt_request &= ~CPU_INTERRUPT_EXIT; | |
306 | env->exception_index = EXCP_INTERRUPT; | |
307 | } | |
308 | ||
05330448 AL |
309 | return ret; |
310 | } | |
311 | ||
312 | void kvm_set_phys_mem(target_phys_addr_t start_addr, | |
313 | ram_addr_t size, | |
314 | ram_addr_t phys_offset) | |
315 | { | |
316 | KVMState *s = kvm_state; | |
317 | ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK; | |
318 | KVMSlot *mem; | |
319 | ||
320 | /* KVM does not support read-only slots */ | |
321 | phys_offset &= ~IO_MEM_ROM; | |
322 | ||
323 | mem = kvm_lookup_slot(s, start_addr); | |
324 | if (mem) { | |
a3d6841f | 325 | if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) { |
05330448 AL |
326 | mem->memory_size = 0; |
327 | mem->guest_phys_addr = start_addr; | |
328 | mem->userspace_addr = 0; | |
329 | mem->flags = 0; | |
330 | ||
331 | kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem); | |
332 | } else if (start_addr >= mem->guest_phys_addr && | |
62d60e8c AL |
333 | (start_addr + size) <= (mem->guest_phys_addr + |
334 | mem->memory_size)) { | |
335 | KVMSlot slot; | |
336 | target_phys_addr_t mem_start; | |
337 | ram_addr_t mem_size, mem_offset; | |
338 | ||
339 | /* Not splitting */ | |
340 | if ((phys_offset - (start_addr - mem->guest_phys_addr)) == | |
341 | ((uint8_t *)mem->userspace_addr - phys_ram_base)) | |
342 | return; | |
343 | ||
344 | /* unregister whole slot */ | |
345 | memcpy(&slot, mem, sizeof(slot)); | |
346 | mem->memory_size = 0; | |
347 | kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem); | |
348 | ||
349 | /* register prefix slot */ | |
350 | mem_start = slot.guest_phys_addr; | |
351 | mem_size = start_addr - slot.guest_phys_addr; | |
352 | mem_offset = (uint8_t *)slot.userspace_addr - phys_ram_base; | |
353 | if (mem_size) | |
354 | kvm_set_phys_mem(mem_start, mem_size, mem_offset); | |
355 | ||
356 | /* register new slot */ | |
357 | kvm_set_phys_mem(start_addr, size, phys_offset); | |
358 | ||
359 | /* register suffix slot */ | |
360 | mem_start = start_addr + size; | |
361 | mem_offset += mem_size + size; | |
362 | mem_size = slot.memory_size - mem_size - size; | |
363 | if (mem_size) | |
364 | kvm_set_phys_mem(mem_start, mem_size, mem_offset); | |
365 | ||
05330448 | 366 | return; |
62d60e8c AL |
367 | } else { |
368 | printf("Registering overlapping slot\n"); | |
369 | abort(); | |
370 | } | |
05330448 | 371 | } |
05330448 AL |
372 | /* KVM does not need to know about this memory */ |
373 | if (flags >= IO_MEM_UNASSIGNED) | |
374 | return; | |
375 | ||
376 | mem = kvm_alloc_slot(s); | |
377 | mem->memory_size = size; | |
378 | mem->guest_phys_addr = start_addr; | |
379 | mem->userspace_addr = (unsigned long)(phys_ram_base + phys_offset); | |
380 | mem->flags = 0; | |
381 | ||
382 | kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem); | |
383 | /* FIXME deal with errors */ | |
384 | } | |
385 | ||
984b5181 | 386 | int kvm_ioctl(KVMState *s, int type, ...) |
05330448 AL |
387 | { |
388 | int ret; | |
984b5181 AL |
389 | void *arg; |
390 | va_list ap; | |
05330448 | 391 | |
984b5181 AL |
392 | va_start(ap, type); |
393 | arg = va_arg(ap, void *); | |
394 | va_end(ap); | |
395 | ||
396 | ret = ioctl(s->fd, type, arg); | |
05330448 AL |
397 | if (ret == -1) |
398 | ret = -errno; | |
399 | ||
400 | return ret; | |
401 | } | |
402 | ||
984b5181 | 403 | int kvm_vm_ioctl(KVMState *s, int type, ...) |
05330448 AL |
404 | { |
405 | int ret; | |
984b5181 AL |
406 | void *arg; |
407 | va_list ap; | |
408 | ||
409 | va_start(ap, type); | |
410 | arg = va_arg(ap, void *); | |
411 | va_end(ap); | |
05330448 | 412 | |
984b5181 | 413 | ret = ioctl(s->vmfd, type, arg); |
05330448 AL |
414 | if (ret == -1) |
415 | ret = -errno; | |
416 | ||
417 | return ret; | |
418 | } | |
419 | ||
984b5181 | 420 | int kvm_vcpu_ioctl(CPUState *env, int type, ...) |
05330448 AL |
421 | { |
422 | int ret; | |
984b5181 AL |
423 | void *arg; |
424 | va_list ap; | |
425 | ||
426 | va_start(ap, type); | |
427 | arg = va_arg(ap, void *); | |
428 | va_end(ap); | |
05330448 | 429 | |
984b5181 | 430 | ret = ioctl(env->kvm_fd, type, arg); |
05330448 AL |
431 | if (ret == -1) |
432 | ret = -errno; | |
433 | ||
434 | return ret; | |
435 | } |