]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/kvm.c
target/arm/kvm: Move kvm_arm_verify_ext_dabt_pending and unexport
[mirror_qemu.git] / target / arm / kvm.c
CommitLineData
494b00c7
CD
1/*
2 * ARM implementation of KVM hooks
3 *
4 * Copyright Christoffer Dall 2009-2010
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
74c21bd0 11#include "qemu/osdep.h"
494b00c7 12#include <sys/ioctl.h>
494b00c7
CD
13
14#include <linux/kvm.h>
15
494b00c7 16#include "qemu/timer.h"
2ecb2027 17#include "qemu/error-report.h"
db725815 18#include "qemu/main-loop.h"
dea101a1
AJ
19#include "qom/object.h"
20#include "qapi/error.h"
494b00c7
CD
21#include "sysemu/sysemu.h"
22#include "sysemu/kvm.h"
a27382e2 23#include "sysemu/kvm_int.h"
eb035b48 24#include "kvm_arm.h"
494b00c7 25#include "cpu.h"
b05c81d2 26#include "trace.h"
38df27c8 27#include "internals.h"
b05c81d2 28#include "hw/pci/pci.h"
4c663752 29#include "exec/memattrs.h"
4344af65 30#include "exec/address-spaces.h"
15eafc2e 31#include "hw/boards.h"
64552b6b 32#include "hw/irq.h"
c8f2eb5d 33#include "qapi/visitor.h"
03dd024f 34#include "qemu/log.h"
494b00c7
CD
35
36const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
37 KVM_CAP_LAST_INFO
38};
39
1a1753f7 40static bool cap_has_mp_state;
202ccb6b 41static bool cap_has_inject_serror_esr;
694bcaa8 42static bool cap_has_inject_ext_dabt;
1a1753f7 43
c4487d76
PM
44static ARMHostCPUFeatures arm_host_cpu_features;
45
228d5e04
PS
46int kvm_arm_vcpu_init(CPUState *cs)
47{
48 ARMCPU *cpu = ARM_CPU(cs);
49 struct kvm_vcpu_init init;
50
51 init.target = cpu->kvm_target;
52 memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
53
54 return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
55}
56
14e99e0f
AJ
57int kvm_arm_vcpu_finalize(CPUState *cs, int feature)
58{
59 return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_FINALIZE, &feature);
60}
61
202ccb6b
DG
62void kvm_arm_init_serror_injection(CPUState *cs)
63{
64 cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state,
65 KVM_CAP_ARM_INJECT_SERROR_ESR);
66}
67
a96c0514
PM
68bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
69 int *fdarray,
70 struct kvm_vcpu_init *init)
71{
0cdb4020 72 int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
d26f2f93 73 int max_vm_pa_size;
a96c0514 74
448058aa 75 kvmfd = qemu_open_old("/dev/kvm", O_RDWR);
a96c0514
PM
76 if (kvmfd < 0) {
77 goto err;
78 }
d26f2f93
MZ
79 max_vm_pa_size = ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE);
80 if (max_vm_pa_size < 0) {
81 max_vm_pa_size = 0;
82 }
bbde13cd
PM
83 do {
84 vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size);
85 } while (vmfd == -1 && errno == EINTR);
a96c0514
PM
86 if (vmfd < 0) {
87 goto err;
88 }
89 cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
90 if (cpufd < 0) {
91 goto err;
92 }
93
2f340e9c
PX
94 if (!init) {
95 /* Caller doesn't want the VCPU to be initialized, so skip it */
96 goto finish;
97 }
98
0cdb4020
AJ
99 if (init->target == -1) {
100 struct kvm_vcpu_init preferred;
101
102 ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred);
103 if (!ret) {
104 init->target = preferred.target;
105 }
106 }
a96c0514
PM
107 if (ret >= 0) {
108 ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
109 if (ret < 0) {
110 goto err;
111 }
2f340e9c 112 } else if (cpus_to_try) {
a96c0514
PM
113 /* Old kernel which doesn't know about the
114 * PREFERRED_TARGET ioctl: we know it will only support
115 * creating one kind of guest CPU which is its preferred
116 * CPU type.
117 */
0cdb4020
AJ
118 struct kvm_vcpu_init try;
119
a96c0514 120 while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
0cdb4020
AJ
121 try.target = *cpus_to_try++;
122 memcpy(try.features, init->features, sizeof(init->features));
123 ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try);
a96c0514
PM
124 if (ret >= 0) {
125 break;
126 }
127 }
128 if (ret < 0) {
129 goto err;
130 }
0cdb4020 131 init->target = try.target;
2f340e9c
PX
132 } else {
133 /* Treat a NULL cpus_to_try argument the same as an empty
134 * list, which means we will fail the call since this must
135 * be an old kernel which doesn't support PREFERRED_TARGET.
136 */
137 goto err;
a96c0514
PM
138 }
139
2f340e9c 140finish:
a96c0514
PM
141 fdarray[0] = kvmfd;
142 fdarray[1] = vmfd;
143 fdarray[2] = cpufd;
144
145 return true;
146
147err:
148 if (cpufd >= 0) {
149 close(cpufd);
150 }
151 if (vmfd >= 0) {
152 close(vmfd);
153 }
154 if (kvmfd >= 0) {
155 close(kvmfd);
156 }
157
158 return false;
159}
160
161void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
162{
163 int i;
164
165 for (i = 2; i >= 0; i--) {
166 close(fdarray[i]);
167 }
168}
169
c4487d76 170void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
a96c0514 171{
c4487d76 172 CPUARMState *env = &cpu->env;
a96c0514 173
c4487d76
PM
174 if (!arm_host_cpu_features.dtb_compatible) {
175 if (!kvm_enabled() ||
176 !kvm_arm_get_host_cpu_features(&arm_host_cpu_features)) {
177 /* We can't report this error yet, so flag that we need to
178 * in arm_cpu_realizefn().
179 */
180 cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
181 cpu->host_cpu_probe_failed = true;
182 return;
183 }
a96c0514 184 }
c4487d76
PM
185
186 cpu->kvm_target = arm_host_cpu_features.target;
187 cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
4674097c 188 cpu->isar = arm_host_cpu_features.isar;
c4487d76 189 env->features = arm_host_cpu_features.features;
a96c0514
PM
190}
191
dea101a1
AJ
192static bool kvm_no_adjvtime_get(Object *obj, Error **errp)
193{
194 return !ARM_CPU(obj)->kvm_adjvtime;
195}
196
197static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp)
198{
199 ARM_CPU(obj)->kvm_adjvtime = !value;
200}
201
68970d1e
AJ
202static bool kvm_steal_time_get(Object *obj, Error **errp)
203{
204 return ARM_CPU(obj)->kvm_steal_time != ON_OFF_AUTO_OFF;
205}
206
207static void kvm_steal_time_set(Object *obj, bool value, Error **errp)
208{
209 ARM_CPU(obj)->kvm_steal_time = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
210}
211
dea101a1
AJ
212/* KVM VCPU properties should be prefixed with "kvm-". */
213void kvm_arm_add_vcpu_properties(Object *obj)
214{
9e6f8d8a 215 ARMCPU *cpu = ARM_CPU(obj);
216 CPUARMState *env = &cpu->env;
dea101a1 217
9e6f8d8a 218 if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
219 cpu->kvm_adjvtime = true;
220 object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
221 kvm_no_adjvtime_set);
222 object_property_set_description(obj, "kvm-no-adjvtime",
223 "Set on to disable the adjustment of "
224 "the virtual counter. VM stopped time "
225 "will be counted.");
226 }
68970d1e
AJ
227
228 cpu->kvm_steal_time = ON_OFF_AUTO_AUTO;
229 object_property_add_bool(obj, "kvm-steal-time", kvm_steal_time_get,
230 kvm_steal_time_set);
231 object_property_set_description(obj, "kvm-steal-time",
232 "Set off to disable KVM steal time.");
dea101a1
AJ
233}
234
7d20e681 235bool kvm_arm_pmu_supported(void)
ae502508 236{
7d20e681 237 return kvm_check_extension(kvm_state, KVM_CAP_ARM_PMU_V3);
ae502508
AJ
238}
239
bcb902a1 240int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa)
a27382e2
EA
241{
242 KVMState *s = KVM_STATE(ms->accelerator);
243 int ret;
244
245 ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE);
bcb902a1
AJ
246 *fixed_ipa = ret <= 0;
247
a27382e2
EA
248 return ret > 0 ? ret : 40;
249}
250
5e0d6590
AO
251int kvm_arch_get_default_type(MachineState *ms)
252{
1ab445af
AO
253 bool fixed_ipa;
254 int size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa);
255 return fixed_ipa ? 0 : size;
5e0d6590
AO
256}
257
b16565b3 258int kvm_arch_init(MachineState *ms, KVMState *s)
494b00c7 259{
fff9f555 260 int ret = 0;
494b00c7
CD
261 /* For ARM interrupt delivery is always asynchronous,
262 * whether we are using an in-kernel VGIC or not.
263 */
264 kvm_async_interrupts_allowed = true;
a96c0514 265
5d721b78
AG
266 /*
267 * PSCI wakes up secondary cores, so we always need to
268 * have vCPUs waiting in kernel space
269 */
270 kvm_halt_in_kernel_allowed = true;
271
1a1753f7
AB
272 cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
273
fff9f555
EA
274 if (ms->smp.cpus > 256 &&
275 !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) {
276 error_report("Using more than 256 vcpus requires a host kernel "
277 "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2");
278 ret = -EINVAL;
279 }
280
694bcaa8
BM
281 if (kvm_check_extension(s, KVM_CAP_ARM_NISV_TO_USER)) {
282 if (kvm_vm_enable_cap(s, KVM_CAP_ARM_NISV_TO_USER, 0)) {
283 error_report("Failed to enable KVM_CAP_ARM_NISV_TO_USER cap");
284 } else {
285 /* Set status for supporting the external dabt injection */
286 cap_has_inject_ext_dabt = kvm_check_extension(s,
287 KVM_CAP_ARM_INJECT_EXT_DABT);
288 }
289 }
290
c8f2eb5d
SK
291 if (s->kvm_eager_split_size) {
292 uint32_t sizes;
293
294 sizes = kvm_vm_check_extension(s, KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES);
295 if (!sizes) {
296 s->kvm_eager_split_size = 0;
297 warn_report("Eager Page Split support not available");
298 } else if (!(s->kvm_eager_split_size & sizes)) {
299 error_report("Eager Page Split requested chunk size not valid");
300 ret = -EINVAL;
301 } else {
302 ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0,
303 s->kvm_eager_split_size);
304 if (ret < 0) {
305 error_report("Enabling of Eager Page Split failed: %s",
306 strerror(-ret));
307 }
308 }
309 }
310
dd2157d2
RH
311 max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS);
312 hw_watchpoints = g_array_sized_new(true, true,
313 sizeof(HWWatchpoint), max_hw_wps);
314
315 max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS);
316 hw_breakpoints = g_array_sized_new(true, true,
317 sizeof(HWBreakpoint), max_hw_bps);
ad5c6dde 318
fff9f555 319 return ret;
494b00c7
CD
320}
321
322unsigned long kvm_arch_vcpu_id(CPUState *cpu)
323{
324 return cpu->cpu_index;
325}
326
eb035b48
PM
327/* We track all the KVM devices which need their memory addresses
328 * passing to the kernel in a list of these structures.
329 * When board init is complete we run through the list and
330 * tell the kernel the base addresses of the memory regions.
331 * We use a MemoryListener to track mapping and unmapping of
332 * the regions during board creation, so the board models don't
333 * need to do anything special for the KVM case.
19d1bd0b
EA
334 *
335 * Sometimes the address must be OR'ed with some other fields
336 * (for example for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION).
337 * @kda_addr_ormask aims at storing the value of those fields.
eb035b48
PM
338 */
339typedef struct KVMDevice {
340 struct kvm_arm_device_addr kda;
1da41cc1 341 struct kvm_device_attr kdattr;
19d1bd0b 342 uint64_t kda_addr_ormask;
eb035b48
PM
343 MemoryRegion *mr;
344 QSLIST_ENTRY(KVMDevice) entries;
1da41cc1 345 int dev_fd;
eb035b48
PM
346} KVMDevice;
347
b58deb34 348static QSLIST_HEAD(, KVMDevice) kvm_devices_head;
eb035b48
PM
349
350static void kvm_arm_devlistener_add(MemoryListener *listener,
351 MemoryRegionSection *section)
352{
353 KVMDevice *kd;
354
355 QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
356 if (section->mr == kd->mr) {
357 kd->kda.addr = section->offset_within_address_space;
358 }
359 }
360}
361
362static void kvm_arm_devlistener_del(MemoryListener *listener,
363 MemoryRegionSection *section)
364{
365 KVMDevice *kd;
366
367 QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
368 if (section->mr == kd->mr) {
369 kd->kda.addr = -1;
370 }
371 }
372}
373
374static MemoryListener devlistener = {
142518bd 375 .name = "kvm-arm",
eb035b48
PM
376 .region_add = kvm_arm_devlistener_add,
377 .region_del = kvm_arm_devlistener_del,
14a868c6 378 .priority = MEMORY_LISTENER_PRIORITY_MIN,
eb035b48
PM
379};
380
1da41cc1
CD
381static void kvm_arm_set_device_addr(KVMDevice *kd)
382{
383 struct kvm_device_attr *attr = &kd->kdattr;
384 int ret;
385
386 /* If the device control API is available and we have a device fd on the
387 * KVMDevice struct, let's use the newer API
388 */
389 if (kd->dev_fd >= 0) {
390 uint64_t addr = kd->kda.addr;
19d1bd0b
EA
391
392 addr |= kd->kda_addr_ormask;
1da41cc1
CD
393 attr->addr = (uintptr_t)&addr;
394 ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
395 } else {
396 ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
397 }
398
399 if (ret < 0) {
400 fprintf(stderr, "Failed to set device address: %s\n",
401 strerror(-ret));
402 abort();
403 }
404}
405
eb035b48
PM
406static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
407{
408 KVMDevice *kd, *tkd;
409
eb035b48
PM
410 QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
411 if (kd->kda.addr != -1) {
1da41cc1 412 kvm_arm_set_device_addr(kd);
eb035b48 413 }
dfde4e6e 414 memory_region_unref(kd->mr);
5ff9aaab 415 QSLIST_REMOVE_HEAD(&kvm_devices_head, entries);
eb035b48
PM
416 g_free(kd);
417 }
0bbe4354 418 memory_listener_unregister(&devlistener);
eb035b48
PM
419}
420
421static Notifier notify = {
422 .notify = kvm_arm_machine_init_done,
423};
424
1da41cc1 425void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
19d1bd0b 426 uint64_t attr, int dev_fd, uint64_t addr_ormask)
eb035b48
PM
427{
428 KVMDevice *kd;
429
430 if (!kvm_irqchip_in_kernel()) {
431 return;
432 }
433
434 if (QSLIST_EMPTY(&kvm_devices_head)) {
4344af65 435 memory_listener_register(&devlistener, &address_space_memory);
eb035b48
PM
436 qemu_add_machine_init_done_notifier(&notify);
437 }
438 kd = g_new0(KVMDevice, 1);
439 kd->mr = mr;
440 kd->kda.id = devid;
441 kd->kda.addr = -1;
1da41cc1
CD
442 kd->kdattr.flags = 0;
443 kd->kdattr.group = group;
444 kd->kdattr.attr = attr;
445 kd->dev_fd = dev_fd;
19d1bd0b 446 kd->kda_addr_ormask = addr_ormask;
eb035b48 447 QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
dfde4e6e 448 memory_region_ref(kd->mr);
eb035b48
PM
449}
450
38df27c8
AB
451static int compare_u64(const void *a, const void *b)
452{
453 if (*(uint64_t *)a > *(uint64_t *)b) {
454 return 1;
455 }
456 if (*(uint64_t *)a < *(uint64_t *)b) {
457 return -1;
458 }
459 return 0;
460}
461
e5ac4200
AJ
462/*
463 * cpreg_values are sorted in ascending order by KVM register ID
464 * (see kvm_arm_init_cpreg_list). This allows us to cheaply find
465 * the storage for a KVM register by ID with a binary search.
466 */
467static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx)
468{
469 uint64_t *res;
470
471 res = bsearch(&regidx, cpu->cpreg_indexes, cpu->cpreg_array_len,
472 sizeof(uint64_t), compare_u64);
473 assert(res);
474
475 return &cpu->cpreg_values[res - cpu->cpreg_indexes];
476}
477
c8a44709 478/* Initialize the ARMCPU cpreg list according to the kernel's
38df27c8
AB
479 * definition of what CPU registers it knows about (and throw away
480 * the previous TCG-created cpreg list).
481 */
482int kvm_arm_init_cpreg_list(ARMCPU *cpu)
483{
484 struct kvm_reg_list rl;
485 struct kvm_reg_list *rlp;
486 int i, ret, arraylen;
487 CPUState *cs = CPU(cpu);
488
489 rl.n = 0;
490 ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
491 if (ret != -E2BIG) {
492 return ret;
493 }
494 rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
495 rlp->n = rl.n;
496 ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
497 if (ret) {
498 goto out;
499 }
500 /* Sort the list we get back from the kernel, since cpreg_tuples
501 * must be in strictly ascending order.
502 */
503 qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
504
505 for (i = 0, arraylen = 0; i < rlp->n; i++) {
506 if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) {
507 continue;
508 }
509 switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
510 case KVM_REG_SIZE_U32:
511 case KVM_REG_SIZE_U64:
512 break;
513 default:
514 fprintf(stderr, "Can't handle size of register in kernel list\n");
515 ret = -EINVAL;
516 goto out;
517 }
518
519 arraylen++;
520 }
521
522 cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen);
523 cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen);
524 cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
525 arraylen);
526 cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
527 arraylen);
528 cpu->cpreg_array_len = arraylen;
529 cpu->cpreg_vmstate_array_len = arraylen;
530
531 for (i = 0, arraylen = 0; i < rlp->n; i++) {
532 uint64_t regidx = rlp->reg[i];
533 if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) {
534 continue;
535 }
536 cpu->cpreg_indexes[arraylen] = regidx;
537 arraylen++;
538 }
539 assert(cpu->cpreg_array_len == arraylen);
540
541 if (!write_kvmstate_to_list(cpu)) {
542 /* Shouldn't happen unless kernel is inconsistent about
543 * what registers exist.
544 */
545 fprintf(stderr, "Initial read of kernel register state failed\n");
546 ret = -EINVAL;
547 goto out;
548 }
549
550out:
551 g_free(rlp);
552 return ret;
553}
554
ff047453
PM
555bool write_kvmstate_to_list(ARMCPU *cpu)
556{
557 CPUState *cs = CPU(cpu);
558 int i;
559 bool ok = true;
560
561 for (i = 0; i < cpu->cpreg_array_len; i++) {
ff047453
PM
562 uint64_t regidx = cpu->cpreg_indexes[i];
563 uint32_t v32;
564 int ret;
565
ff047453
PM
566 switch (regidx & KVM_REG_SIZE_MASK) {
567 case KVM_REG_SIZE_U32:
40d45b85 568 ret = kvm_get_one_reg(cs, regidx, &v32);
ff047453
PM
569 if (!ret) {
570 cpu->cpreg_values[i] = v32;
571 }
572 break;
573 case KVM_REG_SIZE_U64:
40d45b85 574 ret = kvm_get_one_reg(cs, regidx, cpu->cpreg_values + i);
ff047453
PM
575 break;
576 default:
d385a605 577 g_assert_not_reached();
ff047453
PM
578 }
579 if (ret) {
580 ok = false;
581 }
582 }
583 return ok;
584}
585
4b7a6bf4 586bool write_list_to_kvmstate(ARMCPU *cpu, int level)
ff047453
PM
587{
588 CPUState *cs = CPU(cpu);
589 int i;
590 bool ok = true;
591
592 for (i = 0; i < cpu->cpreg_array_len; i++) {
ff047453
PM
593 uint64_t regidx = cpu->cpreg_indexes[i];
594 uint32_t v32;
595 int ret;
596
4b7a6bf4
CD
597 if (kvm_arm_cpreg_level(regidx) > level) {
598 continue;
599 }
600
ff047453
PM
601 switch (regidx & KVM_REG_SIZE_MASK) {
602 case KVM_REG_SIZE_U32:
603 v32 = cpu->cpreg_values[i];
6c8b9a74 604 ret = kvm_set_one_reg(cs, regidx, &v32);
ff047453
PM
605 break;
606 case KVM_REG_SIZE_U64:
6c8b9a74 607 ret = kvm_set_one_reg(cs, regidx, cpu->cpreg_values + i);
ff047453
PM
608 break;
609 default:
d385a605 610 g_assert_not_reached();
ff047453 611 }
ff047453
PM
612 if (ret) {
613 /* We might fail for "unknown register" and also for
614 * "you tried to set a register which is constant with
615 * a different value from what it actually contains".
616 */
617 ok = false;
618 }
619 }
620 return ok;
621}
622
e5ac4200
AJ
623void kvm_arm_cpu_pre_save(ARMCPU *cpu)
624{
625 /* KVM virtual time adjustment */
626 if (cpu->kvm_vtime_dirty) {
627 *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime;
628 }
629}
630
631void kvm_arm_cpu_post_load(ARMCPU *cpu)
632{
633 /* KVM virtual time adjustment */
634 if (cpu->kvm_adjvtime) {
635 cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT);
636 cpu->kvm_vtime_dirty = true;
637 }
638}
639
38df27c8
AB
640void kvm_arm_reset_vcpu(ARMCPU *cpu)
641{
25f2895e
CD
642 int ret;
643
38df27c8
AB
644 /* Re-init VCPU so that all registers are set to
645 * their respective reset values.
646 */
25f2895e
CD
647 ret = kvm_arm_vcpu_init(CPU(cpu));
648 if (ret < 0) {
649 fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
650 abort();
651 }
652 if (!write_kvmstate_to_list(cpu)) {
653 fprintf(stderr, "write_kvmstate_to_list failed\n");
654 abort();
655 }
b698e4ee
PM
656 /*
657 * Sync the reset values also into the CPUState. This is necessary
658 * because the next thing we do will be a kvm_arch_put_registers()
659 * which will update the list values from the CPUState before copying
660 * the list values back to KVM. It's OK to ignore failure returns here
661 * for the same reason we do so in kvm_arch_get_registers().
662 */
663 write_list_to_cpustate(cpu);
38df27c8
AB
664}
665
1a1753f7
AB
666/*
667 * Update KVM's MP_STATE based on what QEMU thinks it is
668 */
669int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
670{
671 if (cap_has_mp_state) {
672 struct kvm_mp_state mp_state = {
062ba099
AB
673 .mp_state = (cpu->power_state == PSCI_OFF) ?
674 KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
1a1753f7
AB
675 };
676 int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
677 if (ret) {
678 fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
679 __func__, ret, strerror(-ret));
680 return -1;
681 }
682 }
683
684 return 0;
685}
686
687/*
688 * Sync the KVM MP_STATE into QEMU
689 */
690int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
691{
692 if (cap_has_mp_state) {
693 struct kvm_mp_state mp_state;
694 int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
695 if (ret) {
696 fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n",
697 __func__, ret, strerror(-ret));
698 abort();
699 }
062ba099
AB
700 cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
701 PSCI_OFF : PSCI_ON;
1a1753f7
AB
702 }
703
704 return 0;
705}
706
e5ac4200
AJ
707void kvm_arm_get_virtual_time(CPUState *cs)
708{
709 ARMCPU *cpu = ARM_CPU(cs);
e5ac4200
AJ
710 int ret;
711
712 if (cpu->kvm_vtime_dirty) {
713 return;
714 }
715
40d45b85 716 ret = kvm_get_one_reg(cs, KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime);
e5ac4200
AJ
717 if (ret) {
718 error_report("Failed to get KVM_REG_ARM_TIMER_CNT");
719 abort();
720 }
721
722 cpu->kvm_vtime_dirty = true;
723}
724
725void kvm_arm_put_virtual_time(CPUState *cs)
726{
727 ARMCPU *cpu = ARM_CPU(cs);
e5ac4200
AJ
728 int ret;
729
730 if (!cpu->kvm_vtime_dirty) {
731 return;
732 }
733
6c8b9a74 734 ret = kvm_set_one_reg(cs, KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime);
e5ac4200
AJ
735 if (ret) {
736 error_report("Failed to set KVM_REG_ARM_TIMER_CNT");
737 abort();
738 }
739
740 cpu->kvm_vtime_dirty = false;
741}
742
202ccb6b
DG
743int kvm_put_vcpu_events(ARMCPU *cpu)
744{
745 CPUARMState *env = &cpu->env;
746 struct kvm_vcpu_events events;
747 int ret;
748
749 if (!kvm_has_vcpu_events()) {
750 return 0;
751 }
752
753 memset(&events, 0, sizeof(events));
754 events.exception.serror_pending = env->serror.pending;
755
756 /* Inject SError to guest with specified syndrome if host kernel
757 * supports it, otherwise inject SError without syndrome.
758 */
759 if (cap_has_inject_serror_esr) {
760 events.exception.serror_has_esr = env->serror.has_esr;
761 events.exception.serror_esr = env->serror.esr;
762 }
763
764 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
765 if (ret) {
766 error_report("failed to put vcpu events");
767 }
768
769 return ret;
770}
771
772int kvm_get_vcpu_events(ARMCPU *cpu)
773{
774 CPUARMState *env = &cpu->env;
775 struct kvm_vcpu_events events;
776 int ret;
777
778 if (!kvm_has_vcpu_events()) {
779 return 0;
780 }
781
782 memset(&events, 0, sizeof(events));
783 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
784 if (ret) {
785 error_report("failed to get vcpu events");
786 return ret;
787 }
788
789 env->serror.pending = events.exception.serror_pending;
790 env->serror.has_esr = events.exception.serror_has_esr;
791 env->serror.esr = events.exception.serror_esr;
792
793 return 0;
794}
795
20c83dc9
RH
796#define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0)
797#define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2)
798
799/*
800 * ESR_EL1
801 * ISS encoding
802 * AARCH64: DFSC, bits [5:0]
803 * AARCH32:
804 * TTBCR.EAE == 0
805 * FS[4] - DFSR[10]
806 * FS[3:0] - DFSR[3:0]
807 * TTBCR.EAE == 1
808 * FS, bits [5:0]
809 */
810#define ESR_DFSC(aarch64, lpae, v) \
811 ((aarch64 || (lpae)) ? ((v) & 0x3F) \
812 : (((v) >> 6) | ((v) & 0x1F)))
813
814#define ESR_DFSC_EXTABT(aarch64, lpae) \
815 ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8)
816
817/**
818 * kvm_arm_verify_ext_dabt_pending:
819 * @cs: CPUState
820 *
821 * Verify the fault status code wrt the Ext DABT injection
822 *
823 * Returns: true if the fault status code is as expected, false otherwise
824 */
825static bool kvm_arm_verify_ext_dabt_pending(CPUState *cs)
826{
827 uint64_t dfsr_val;
828
829 if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) {
830 ARMCPU *cpu = ARM_CPU(cs);
831 CPUARMState *env = &cpu->env;
832 int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64);
833 int lpae = 0;
834
835 if (!aarch64_mode) {
836 uint64_t ttbcr;
837
838 if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) {
839 lpae = arm_feature(env, ARM_FEATURE_LPAE)
840 && (ttbcr & TTBCR_EAE);
841 }
842 }
843 /*
844 * The verification here is based on the DFSC bits
845 * of the ESR_EL1 reg only
846 */
847 return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) ==
848 ESR_DFSC_EXTABT(aarch64_mode, lpae));
849 }
850 return false;
851}
852
494b00c7
CD
853void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
854{
1711bfa5
BM
855 ARMCPU *cpu = ARM_CPU(cs);
856 CPUARMState *env = &cpu->env;
857
858 if (unlikely(env->ext_dabt_raised)) {
859 /*
860 * Verifying that the ext DABT has been properly injected,
861 * otherwise risking indefinitely re-running the faulting instruction
862 * Covering a very narrow case for kernels 5.5..5.5.4
863 * when injected abort was misconfigured to be
864 * an IMPLEMENTATION DEFINED exception (for 32-bit EL1)
865 */
866 if (!arm_feature(env, ARM_FEATURE_AARCH64) &&
867 unlikely(!kvm_arm_verify_ext_dabt_pending(cs))) {
868
869 error_report("Data abort exception with no valid ISS generated by "
870 "guest memory access. KVM unable to emulate faulting "
871 "instruction. Failed to inject an external data abort "
872 "into the guest.");
873 abort();
874 }
875 /* Clear the status */
876 env->ext_dabt_raised = 0;
877 }
494b00c7
CD
878}
879
4c663752 880MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
494b00c7 881{
5d721b78
AG
882 ARMCPU *cpu;
883 uint32_t switched_level;
884
885 if (kvm_irqchip_in_kernel()) {
886 /*
887 * We only need to sync timer states with user-space interrupt
888 * controllers, so return early and save cycles if we don't.
889 */
890 return MEMTXATTRS_UNSPECIFIED;
891 }
892
893 cpu = ARM_CPU(cs);
894
895 /* Synchronize our shadowed in-kernel device irq lines with the kvm ones */
896 if (run->s.regs.device_irq_level != cpu->device_irq_level) {
897 switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level;
898
899 qemu_mutex_lock_iothread();
900
901 if (switched_level & KVM_ARM_DEV_EL1_VTIMER) {
902 qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT],
903 !!(run->s.regs.device_irq_level &
904 KVM_ARM_DEV_EL1_VTIMER));
905 switched_level &= ~KVM_ARM_DEV_EL1_VTIMER;
906 }
907
908 if (switched_level & KVM_ARM_DEV_EL1_PTIMER) {
909 qemu_set_irq(cpu->gt_timer_outputs[GTIMER_PHYS],
910 !!(run->s.regs.device_irq_level &
911 KVM_ARM_DEV_EL1_PTIMER));
912 switched_level &= ~KVM_ARM_DEV_EL1_PTIMER;
913 }
914
b1659527
AJ
915 if (switched_level & KVM_ARM_DEV_PMU) {
916 qemu_set_irq(cpu->pmu_interrupt,
917 !!(run->s.regs.device_irq_level & KVM_ARM_DEV_PMU));
918 switched_level &= ~KVM_ARM_DEV_PMU;
919 }
5d721b78
AG
920
921 if (switched_level) {
922 qemu_log_mask(LOG_UNIMP, "%s: unhandled in-kernel device IRQ %x\n",
923 __func__, switched_level);
924 }
925
926 /* We also mark unknown levels as processed to not waste cycles */
927 cpu->device_irq_level = run->s.regs.device_irq_level;
928 qemu_mutex_unlock_iothread();
929 }
930
4c663752 931 return MEMTXATTRS_UNSPECIFIED;
494b00c7
CD
932}
933
538f0497 934void kvm_arm_vm_state_change(void *opaque, bool running, RunState state)
e5ac4200
AJ
935{
936 CPUState *cs = opaque;
937 ARMCPU *cpu = ARM_CPU(cs);
938
939 if (running) {
940 if (cpu->kvm_adjvtime) {
941 kvm_arm_put_virtual_time(cs);
942 }
943 } else {
944 if (cpu->kvm_adjvtime) {
945 kvm_arm_get_virtual_time(cs);
946 }
947 }
948}
2ecb2027 949
694bcaa8
BM
950/**
951 * kvm_arm_handle_dabt_nisv:
952 * @cs: CPUState
953 * @esr_iss: ISS encoding (limited) for the exception from Data Abort
954 * ISV bit set to '0b0' -> no valid instruction syndrome
955 * @fault_ipa: faulting address for the synchronous data abort
956 *
957 * Returns: 0 if the exception has been handled, < 0 otherwise
958 */
959static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss,
960 uint64_t fault_ipa)
961{
1711bfa5
BM
962 ARMCPU *cpu = ARM_CPU(cs);
963 CPUARMState *env = &cpu->env;
694bcaa8
BM
964 /*
965 * Request KVM to inject the external data abort into the guest
966 */
967 if (cap_has_inject_ext_dabt) {
968 struct kvm_vcpu_events events = { };
969 /*
970 * The external data abort event will be handled immediately by KVM
971 * using the address fault that triggered the exit on given VCPU.
972 * Requesting injection of the external data abort does not rely
973 * on any other VCPU state. Therefore, in this particular case, the VCPU
974 * synchronization can be exceptionally skipped.
975 */
976 events.exception.ext_dabt_pending = 1;
977 /* KVM_CAP_ARM_INJECT_EXT_DABT implies KVM_CAP_VCPU_EVENTS */
1711bfa5
BM
978 if (!kvm_vcpu_ioctl(cs, KVM_SET_VCPU_EVENTS, &events)) {
979 env->ext_dabt_raised = 1;
980 return 0;
981 }
694bcaa8
BM
982 } else {
983 error_report("Data abort exception triggered by guest memory access "
984 "at physical address: 0x" TARGET_FMT_lx,
985 (target_ulong)fault_ipa);
986 error_printf("KVM unable to emulate faulting instruction.\n");
987 }
988 return -1;
989}
990
494b00c7
CD
991int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
992{
2ecb2027
AB
993 int ret = 0;
994
995 switch (run->exit_reason) {
996 case KVM_EXIT_DEBUG:
997 if (kvm_arm_handle_debug(cs, &run->debug.arch)) {
998 ret = EXCP_DEBUG;
999 } /* otherwise return to guest */
1000 break;
694bcaa8
BM
1001 case KVM_EXIT_ARM_NISV:
1002 /* External DABT with no valid iss to decode */
1003 ret = kvm_arm_handle_dabt_nisv(cs, run->arm_nisv.esr_iss,
1004 run->arm_nisv.fault_ipa);
1005 break;
2ecb2027
AB
1006 default:
1007 qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
1008 __func__, run->exit_reason);
1009 break;
1010 }
1011 return ret;
494b00c7
CD
1012}
1013
494b00c7
CD
1014bool kvm_arch_stop_on_emulation_error(CPUState *cs)
1015{
1016 return true;
1017}
1018
1019int kvm_arch_process_async_events(CPUState *cs)
1020{
1021 return 0;
1022}
1023
494b00c7
CD
1024void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1025{
2ecb2027
AB
1026 if (kvm_sw_breakpoints_active(cs)) {
1027 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1028 }
e4482ab7
AB
1029 if (kvm_arm_hw_debug_active(cs)) {
1030 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
1031 kvm_arm_copy_hw_debug_data(&dbg->arch);
1032 }
494b00c7 1033}
b3a1c626
AK
1034
1035void kvm_arch_init_irq_routing(KVMState *s)
1036{
1037}
1da41cc1 1038
4376c40d 1039int kvm_arch_irqchip_create(KVMState *s)
1da41cc1 1040{
4376c40d 1041 if (kvm_kernel_irqchip_split()) {
47c182fe 1042 error_report("-machine kernel_irqchip=split is not supported on ARM.");
4376c40d 1043 exit(1);
15eafc2e
PB
1044 }
1045
1da41cc1
CD
1046 /* If we can create the VGIC using the newer device control API, we
1047 * let the device do this when it initializes itself, otherwise we
1048 * fall back to the old API */
34e85cd9
PF
1049 return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
1050}
1da41cc1 1051
34e85cd9
PF
1052int kvm_arm_vgic_probe(void)
1053{
d45efe47
EA
1054 int val = 0;
1055
34e85cd9
PF
1056 if (kvm_create_device(kvm_state,
1057 KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
d45efe47
EA
1058 val |= KVM_ARM_VGIC_V3;
1059 }
1060 if (kvm_create_device(kvm_state,
1061 KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
1062 val |= KVM_ARM_VGIC_V2;
1da41cc1 1063 }
d45efe47 1064 return val;
1da41cc1 1065}
9e03a040 1066
f6530926
EA
1067int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level)
1068{
1069 int kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) | irq;
1070 int cpu_idx1 = cpu % 256;
1071 int cpu_idx2 = cpu / 256;
1072
1073 kvm_irq |= (cpu_idx1 << KVM_ARM_IRQ_VCPU_SHIFT) |
1074 (cpu_idx2 << KVM_ARM_IRQ_VCPU2_SHIFT);
1075
1076 return kvm_set_irq(kvm_state, kvm_irq, !!level);
1077}
1078
9e03a040 1079int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
dc9f06ca 1080 uint64_t address, uint32_t data, PCIDevice *dev)
9e03a040 1081{
b05c81d2
EA
1082 AddressSpace *as = pci_device_iommu_address_space(dev);
1083 hwaddr xlat, len, doorbell_gpa;
1084 MemoryRegionSection mrs;
1085 MemoryRegion *mr;
b05c81d2
EA
1086
1087 if (as == &address_space_memory) {
1088 return 0;
1089 }
1090
1091 /* MSI doorbell address is translated by an IOMMU */
1092
dfa0d9b8
HM
1093 RCU_READ_LOCK_GUARD();
1094
bc6b1cec
PM
1095 mr = address_space_translate(as, address, &xlat, &len, true,
1096 MEMTXATTRS_UNSPECIFIED);
dfa0d9b8 1097
b05c81d2 1098 if (!mr) {
dfa0d9b8 1099 return 1;
b05c81d2 1100 }
dfa0d9b8 1101
b05c81d2 1102 mrs = memory_region_find(mr, xlat, 1);
dfa0d9b8 1103
b05c81d2 1104 if (!mrs.mr) {
dfa0d9b8 1105 return 1;
b05c81d2
EA
1106 }
1107
1108 doorbell_gpa = mrs.offset_within_address_space;
1109 memory_region_unref(mrs.mr);
1110
1111 route->u.msi.address_lo = doorbell_gpa;
1112 route->u.msi.address_hi = doorbell_gpa >> 32;
1113
1114 trace_kvm_arm_fixup_msi_route(address, doorbell_gpa);
1115
dfa0d9b8 1116 return 0;
9e03a040 1117}
1850b6b7 1118
38d87493
PX
1119int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
1120 int vector, PCIDevice *dev)
1121{
1122 return 0;
1123}
1124
1125int kvm_arch_release_virq_post(int virq)
1126{
1127 return 0;
1128}
1129
1850b6b7
EA
1130int kvm_arch_msi_data_to_gsi(uint32_t data)
1131{
1132 return (data - 32) & 0xffff;
1133}
92a5199b
TL
1134
1135bool kvm_arch_cpu_check_are_resettable(void)
1136{
1137 return true;
1138}
3dba0a33 1139
c8f2eb5d
SK
1140static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v,
1141 const char *name, void *opaque,
1142 Error **errp)
1143{
1144 KVMState *s = KVM_STATE(obj);
1145 uint64_t value = s->kvm_eager_split_size;
1146
1147 visit_type_size(v, name, &value, errp);
1148}
1149
1150static void kvm_arch_set_eager_split_size(Object *obj, Visitor *v,
1151 const char *name, void *opaque,
1152 Error **errp)
1153{
1154 KVMState *s = KVM_STATE(obj);
1155 uint64_t value;
1156
1157 if (s->fd != -1) {
1158 error_setg(errp, "Unable to set early-split-size after KVM has been initialized");
1159 return;
1160 }
1161
1162 if (!visit_type_size(v, name, &value, errp)) {
1163 return;
1164 }
1165
1166 if (value && !is_power_of_2(value)) {
1167 error_setg(errp, "early-split-size must be a power of two");
1168 return;
1169 }
1170
1171 s->kvm_eager_split_size = value;
1172}
1173
3dba0a33
PB
1174void kvm_arch_accel_class_init(ObjectClass *oc)
1175{
c8f2eb5d
SK
1176 object_class_property_add(oc, "eager-split-size", "size",
1177 kvm_arch_get_eager_split_size,
1178 kvm_arch_set_eager_split_size, NULL, NULL);
1179
1180 object_class_property_set_description(oc, "eager-split-size",
1181 "Eager Page Split chunk size for hugepages. (default: 0, disabled)");
3dba0a33 1182}