]>
Commit | Line | Data |
---|---|---|
494b00c7 CD |
1 | /* |
2 | * ARM implementation of KVM hooks | |
3 | * | |
4 | * Copyright Christoffer Dall 2009-2010 | |
de3c9601 RH |
5 | * Copyright Mian-M. Hamayun 2013, Virtual Open Systems |
6 | * Copyright Alex Bennée 2014, Linaro | |
494b00c7 CD |
7 | * |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
9 | * See the COPYING file in the top-level directory. | |
10 | * | |
11 | */ | |
12 | ||
74c21bd0 | 13 | #include "qemu/osdep.h" |
494b00c7 | 14 | #include <sys/ioctl.h> |
494b00c7 CD |
15 | |
16 | #include <linux/kvm.h> | |
17 | ||
494b00c7 | 18 | #include "qemu/timer.h" |
2ecb2027 | 19 | #include "qemu/error-report.h" |
db725815 | 20 | #include "qemu/main-loop.h" |
dea101a1 AJ |
21 | #include "qom/object.h" |
22 | #include "qapi/error.h" | |
494b00c7 | 23 | #include "sysemu/sysemu.h" |
de3c9601 | 24 | #include "sysemu/runstate.h" |
494b00c7 | 25 | #include "sysemu/kvm.h" |
a27382e2 | 26 | #include "sysemu/kvm_int.h" |
eb035b48 | 27 | #include "kvm_arm.h" |
494b00c7 | 28 | #include "cpu.h" |
b05c81d2 | 29 | #include "trace.h" |
38df27c8 | 30 | #include "internals.h" |
b05c81d2 | 31 | #include "hw/pci/pci.h" |
4c663752 | 32 | #include "exec/memattrs.h" |
4344af65 | 33 | #include "exec/address-spaces.h" |
de3c9601 | 34 | #include "exec/gdbstub.h" |
15eafc2e | 35 | #include "hw/boards.h" |
64552b6b | 36 | #include "hw/irq.h" |
c8f2eb5d | 37 | #include "qapi/visitor.h" |
03dd024f | 38 | #include "qemu/log.h" |
de3c9601 RH |
39 | #include "hw/acpi/acpi.h" |
40 | #include "hw/acpi/ghes.h" | |
f4f318b4 | 41 | #include "target/arm/gtimer.h" |
494b00c7 CD |
42 | |
43 | const KVMCapabilityInfo kvm_arch_required_capabilities[] = { | |
44 | KVM_CAP_LAST_INFO | |
45 | }; | |
46 | ||
1a1753f7 | 47 | static bool cap_has_mp_state; |
202ccb6b | 48 | static bool cap_has_inject_serror_esr; |
694bcaa8 | 49 | static bool cap_has_inject_ext_dabt; |
1a1753f7 | 50 | |
dc40d45e RH |
51 | /** |
52 | * ARMHostCPUFeatures: information about the host CPU (identified | |
53 | * by asking the host kernel) | |
54 | */ | |
55 | typedef struct ARMHostCPUFeatures { | |
56 | ARMISARegisters isar; | |
57 | uint64_t features; | |
58 | uint32_t target; | |
59 | const char *dtb_compatible; | |
60 | } ARMHostCPUFeatures; | |
61 | ||
c4487d76 PM |
62 | static ARMHostCPUFeatures arm_host_cpu_features; |
63 | ||
5a8a6013 RH |
64 | /** |
65 | * kvm_arm_vcpu_init: | |
bbb22d58 | 66 | * @cpu: ARMCPU |
5a8a6013 RH |
67 | * |
68 | * Initialize (or reinitialize) the VCPU by invoking the | |
69 | * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature | |
70 | * bitmask specified in the CPUState. | |
71 | * | |
72 | * Returns: 0 if success else < 0 error code | |
73 | */ | |
bbb22d58 | 74 | static int kvm_arm_vcpu_init(ARMCPU *cpu) |
228d5e04 | 75 | { |
228d5e04 PS |
76 | struct kvm_vcpu_init init; |
77 | ||
78 | init.target = cpu->kvm_target; | |
79 | memcpy(init.features, cpu->kvm_init_features, sizeof(init.features)); | |
80 | ||
bbb22d58 | 81 | return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_INIT, &init); |
228d5e04 PS |
82 | } |
83 | ||
c223c67a RH |
84 | /** |
85 | * kvm_arm_vcpu_finalize: | |
0d31a631 | 86 | * @cpu: ARMCPU |
c223c67a RH |
87 | * @feature: feature to finalize |
88 | * | |
89 | * Finalizes the configuration of the specified VCPU feature by | |
90 | * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring | |
91 | * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of | |
92 | * KVM's API documentation. | |
93 | * | |
94 | * Returns: 0 if success else < 0 error code | |
95 | */ | |
0d31a631 | 96 | static int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature) |
14e99e0f | 97 | { |
0d31a631 | 98 | return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_FINALIZE, &feature); |
14e99e0f AJ |
99 | } |
100 | ||
a96c0514 PM |
101 | bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, |
102 | int *fdarray, | |
103 | struct kvm_vcpu_init *init) | |
104 | { | |
0cdb4020 | 105 | int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1; |
d26f2f93 | 106 | int max_vm_pa_size; |
a96c0514 | 107 | |
448058aa | 108 | kvmfd = qemu_open_old("/dev/kvm", O_RDWR); |
a96c0514 PM |
109 | if (kvmfd < 0) { |
110 | goto err; | |
111 | } | |
d26f2f93 MZ |
112 | max_vm_pa_size = ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE); |
113 | if (max_vm_pa_size < 0) { | |
114 | max_vm_pa_size = 0; | |
115 | } | |
bbde13cd PM |
116 | do { |
117 | vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); | |
118 | } while (vmfd == -1 && errno == EINTR); | |
a96c0514 PM |
119 | if (vmfd < 0) { |
120 | goto err; | |
121 | } | |
122 | cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); | |
123 | if (cpufd < 0) { | |
124 | goto err; | |
125 | } | |
126 | ||
2f340e9c PX |
127 | if (!init) { |
128 | /* Caller doesn't want the VCPU to be initialized, so skip it */ | |
129 | goto finish; | |
130 | } | |
131 | ||
0cdb4020 AJ |
132 | if (init->target == -1) { |
133 | struct kvm_vcpu_init preferred; | |
134 | ||
135 | ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred); | |
136 | if (!ret) { | |
137 | init->target = preferred.target; | |
138 | } | |
139 | } | |
a96c0514 PM |
140 | if (ret >= 0) { |
141 | ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); | |
142 | if (ret < 0) { | |
143 | goto err; | |
144 | } | |
2f340e9c | 145 | } else if (cpus_to_try) { |
a96c0514 PM |
146 | /* Old kernel which doesn't know about the |
147 | * PREFERRED_TARGET ioctl: we know it will only support | |
148 | * creating one kind of guest CPU which is its preferred | |
149 | * CPU type. | |
150 | */ | |
0cdb4020 AJ |
151 | struct kvm_vcpu_init try; |
152 | ||
a96c0514 | 153 | while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) { |
0cdb4020 AJ |
154 | try.target = *cpus_to_try++; |
155 | memcpy(try.features, init->features, sizeof(init->features)); | |
156 | ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try); | |
a96c0514 PM |
157 | if (ret >= 0) { |
158 | break; | |
159 | } | |
160 | } | |
161 | if (ret < 0) { | |
162 | goto err; | |
163 | } | |
0cdb4020 | 164 | init->target = try.target; |
2f340e9c PX |
165 | } else { |
166 | /* Treat a NULL cpus_to_try argument the same as an empty | |
167 | * list, which means we will fail the call since this must | |
168 | * be an old kernel which doesn't support PREFERRED_TARGET. | |
169 | */ | |
170 | goto err; | |
a96c0514 PM |
171 | } |
172 | ||
2f340e9c | 173 | finish: |
a96c0514 PM |
174 | fdarray[0] = kvmfd; |
175 | fdarray[1] = vmfd; | |
176 | fdarray[2] = cpufd; | |
177 | ||
178 | return true; | |
179 | ||
180 | err: | |
181 | if (cpufd >= 0) { | |
182 | close(cpufd); | |
183 | } | |
184 | if (vmfd >= 0) { | |
185 | close(vmfd); | |
186 | } | |
187 | if (kvmfd >= 0) { | |
188 | close(kvmfd); | |
189 | } | |
190 | ||
191 | return false; | |
192 | } | |
193 | ||
194 | void kvm_arm_destroy_scratch_host_vcpu(int *fdarray) | |
195 | { | |
196 | int i; | |
197 | ||
198 | for (i = 2; i >= 0; i--) { | |
199 | close(fdarray[i]); | |
200 | } | |
201 | } | |
202 | ||
dc40d45e RH |
203 | static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) |
204 | { | |
205 | uint64_t ret; | |
206 | struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; | |
207 | int err; | |
208 | ||
209 | assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); | |
210 | err = ioctl(fd, KVM_GET_ONE_REG, &idreg); | |
211 | if (err < 0) { | |
212 | return -1; | |
213 | } | |
214 | *pret = ret; | |
215 | return 0; | |
216 | } | |
217 | ||
218 | static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) | |
219 | { | |
220 | struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; | |
221 | ||
222 | assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); | |
223 | return ioctl(fd, KVM_GET_ONE_REG, &idreg); | |
224 | } | |
225 | ||
226 | static bool kvm_arm_pauth_supported(void) | |
227 | { | |
228 | return (kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_ADDRESS) && | |
229 | kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC)); | |
230 | } | |
231 | ||
232 | static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) | |
233 | { | |
234 | /* Identify the feature bits corresponding to the host CPU, and | |
235 | * fill out the ARMHostCPUClass fields accordingly. To do this | |
236 | * we have to create a scratch VM, create a single CPU inside it, | |
237 | * and then query that CPU for the relevant ID registers. | |
238 | */ | |
239 | int fdarray[3]; | |
240 | bool sve_supported; | |
241 | bool pmu_supported = false; | |
242 | uint64_t features = 0; | |
243 | int err; | |
244 | ||
245 | /* Old kernels may not know about the PREFERRED_TARGET ioctl: however | |
246 | * we know these will only support creating one kind of guest CPU, | |
247 | * which is its preferred CPU type. Fortunately these old kernels | |
248 | * support only a very limited number of CPUs. | |
249 | */ | |
250 | static const uint32_t cpus_to_try[] = { | |
251 | KVM_ARM_TARGET_AEM_V8, | |
252 | KVM_ARM_TARGET_FOUNDATION_V8, | |
253 | KVM_ARM_TARGET_CORTEX_A57, | |
254 | QEMU_KVM_ARM_TARGET_NONE | |
255 | }; | |
256 | /* | |
257 | * target = -1 informs kvm_arm_create_scratch_host_vcpu() | |
258 | * to use the preferred target | |
259 | */ | |
260 | struct kvm_vcpu_init init = { .target = -1, }; | |
261 | ||
262 | /* | |
263 | * Ask for SVE if supported, so that we can query ID_AA64ZFR0, | |
264 | * which is otherwise RAZ. | |
265 | */ | |
266 | sve_supported = kvm_arm_sve_supported(); | |
267 | if (sve_supported) { | |
268 | init.features[0] |= 1 << KVM_ARM_VCPU_SVE; | |
269 | } | |
270 | ||
271 | /* | |
272 | * Ask for Pointer Authentication if supported, so that we get | |
273 | * the unsanitized field values for AA64ISAR1_EL1. | |
274 | */ | |
275 | if (kvm_arm_pauth_supported()) { | |
276 | init.features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | | |
277 | 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); | |
278 | } | |
279 | ||
280 | if (kvm_arm_pmu_supported()) { | |
281 | init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; | |
282 | pmu_supported = true; | |
283 | } | |
284 | ||
285 | if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { | |
286 | return false; | |
287 | } | |
288 | ||
289 | ahcf->target = init.target; | |
290 | ahcf->dtb_compatible = "arm,arm-v8"; | |
291 | ||
292 | err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, | |
293 | ARM64_SYS_REG(3, 0, 0, 4, 0)); | |
294 | if (unlikely(err < 0)) { | |
295 | /* | |
296 | * Before v4.15, the kernel only exposed a limited number of system | |
297 | * registers, not including any of the interesting AArch64 ID regs. | |
298 | * For the most part we could leave these fields as zero with minimal | |
299 | * effect, since this does not affect the values seen by the guest. | |
300 | * | |
301 | * However, it could cause problems down the line for QEMU, | |
302 | * so provide a minimal v8.0 default. | |
303 | * | |
304 | * ??? Could read MIDR and use knowledge from cpu64.c. | |
305 | * ??? Could map a page of memory into our temp guest and | |
306 | * run the tiniest of hand-crafted kernels to extract | |
307 | * the values seen by the guest. | |
308 | * ??? Either of these sounds like too much effort just | |
309 | * to work around running a modern host kernel. | |
310 | */ | |
311 | ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ | |
312 | err = 0; | |
313 | } else { | |
314 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, | |
315 | ARM64_SYS_REG(3, 0, 0, 4, 1)); | |
316 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, | |
317 | ARM64_SYS_REG(3, 0, 0, 4, 5)); | |
318 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, | |
319 | ARM64_SYS_REG(3, 0, 0, 5, 0)); | |
320 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, | |
321 | ARM64_SYS_REG(3, 0, 0, 5, 1)); | |
322 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, | |
323 | ARM64_SYS_REG(3, 0, 0, 6, 0)); | |
324 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, | |
325 | ARM64_SYS_REG(3, 0, 0, 6, 1)); | |
326 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2, | |
327 | ARM64_SYS_REG(3, 0, 0, 6, 2)); | |
328 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, | |
329 | ARM64_SYS_REG(3, 0, 0, 7, 0)); | |
330 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, | |
331 | ARM64_SYS_REG(3, 0, 0, 7, 1)); | |
332 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, | |
333 | ARM64_SYS_REG(3, 0, 0, 7, 2)); | |
334 | ||
335 | /* | |
336 | * Note that if AArch32 support is not present in the host, | |
337 | * the AArch32 sysregs are present to be read, but will | |
338 | * return UNKNOWN values. This is neither better nor worse | |
339 | * than skipping the reads and leaving 0, as we must avoid | |
340 | * considering the values in every case. | |
341 | */ | |
342 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, | |
343 | ARM64_SYS_REG(3, 0, 0, 1, 0)); | |
344 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, | |
345 | ARM64_SYS_REG(3, 0, 0, 1, 1)); | |
346 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, | |
347 | ARM64_SYS_REG(3, 0, 0, 1, 2)); | |
348 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, | |
349 | ARM64_SYS_REG(3, 0, 0, 1, 4)); | |
350 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, | |
351 | ARM64_SYS_REG(3, 0, 0, 1, 5)); | |
352 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, | |
353 | ARM64_SYS_REG(3, 0, 0, 1, 6)); | |
354 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, | |
355 | ARM64_SYS_REG(3, 0, 0, 1, 7)); | |
356 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, | |
357 | ARM64_SYS_REG(3, 0, 0, 2, 0)); | |
358 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, | |
359 | ARM64_SYS_REG(3, 0, 0, 2, 1)); | |
360 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, | |
361 | ARM64_SYS_REG(3, 0, 0, 2, 2)); | |
362 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, | |
363 | ARM64_SYS_REG(3, 0, 0, 2, 3)); | |
364 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, | |
365 | ARM64_SYS_REG(3, 0, 0, 2, 4)); | |
366 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, | |
367 | ARM64_SYS_REG(3, 0, 0, 2, 5)); | |
368 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, | |
369 | ARM64_SYS_REG(3, 0, 0, 2, 6)); | |
370 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, | |
371 | ARM64_SYS_REG(3, 0, 0, 2, 7)); | |
372 | ||
373 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, | |
374 | ARM64_SYS_REG(3, 0, 0, 3, 0)); | |
375 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, | |
376 | ARM64_SYS_REG(3, 0, 0, 3, 1)); | |
377 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, | |
378 | ARM64_SYS_REG(3, 0, 0, 3, 2)); | |
379 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, | |
380 | ARM64_SYS_REG(3, 0, 0, 3, 4)); | |
381 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1, | |
382 | ARM64_SYS_REG(3, 0, 0, 3, 5)); | |
383 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5, | |
384 | ARM64_SYS_REG(3, 0, 0, 3, 6)); | |
385 | ||
386 | /* | |
387 | * DBGDIDR is a bit complicated because the kernel doesn't | |
388 | * provide an accessor for it in 64-bit mode, which is what this | |
389 | * scratch VM is in, and there's no architected "64-bit sysreg | |
390 | * which reads the same as the 32-bit register" the way there is | |
391 | * for other ID registers. Instead we synthesize a value from the | |
392 | * AArch64 ID_AA64DFR0, the same way the kernel code in | |
393 | * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. | |
394 | * We only do this if the CPU supports AArch32 at EL1. | |
395 | */ | |
396 | if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { | |
397 | int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); | |
398 | int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); | |
399 | int ctx_cmps = | |
400 | FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); | |
401 | int version = 6; /* ARMv8 debug architecture */ | |
402 | bool has_el3 = | |
403 | !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); | |
404 | uint32_t dbgdidr = 0; | |
405 | ||
406 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); | |
407 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); | |
408 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); | |
409 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); | |
410 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); | |
411 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); | |
412 | dbgdidr |= (1 << 15); /* RES1 bit */ | |
413 | ahcf->isar.dbgdidr = dbgdidr; | |
414 | } | |
415 | ||
416 | if (pmu_supported) { | |
417 | /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ | |
418 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, | |
419 | ARM64_SYS_REG(3, 3, 9, 12, 0)); | |
420 | } | |
421 | ||
422 | if (sve_supported) { | |
423 | /* | |
424 | * There is a range of kernels between kernel commit 73433762fcae | |
425 | * and f81cb2c3ad41 which have a bug where the kernel doesn't | |
426 | * expose SYS_ID_AA64ZFR0_EL1 via the ONE_REG API unless the VM has | |
427 | * enabled SVE support, which resulted in an error rather than RAZ. | |
428 | * So only read the register if we set KVM_ARM_VCPU_SVE above. | |
429 | */ | |
430 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, | |
431 | ARM64_SYS_REG(3, 0, 0, 4, 4)); | |
432 | } | |
433 | } | |
434 | ||
435 | kvm_arm_destroy_scratch_host_vcpu(fdarray); | |
436 | ||
437 | if (err < 0) { | |
438 | return false; | |
439 | } | |
440 | ||
441 | /* | |
442 | * We can assume any KVM supporting CPU is at least a v8 | |
443 | * with VFPv4+Neon; this in turn implies most of the other | |
444 | * feature bits. | |
445 | */ | |
446 | features |= 1ULL << ARM_FEATURE_V8; | |
447 | features |= 1ULL << ARM_FEATURE_NEON; | |
448 | features |= 1ULL << ARM_FEATURE_AARCH64; | |
449 | features |= 1ULL << ARM_FEATURE_PMU; | |
450 | features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; | |
451 | ||
452 | ahcf->features = features; | |
453 | ||
454 | return true; | |
455 | } | |
456 | ||
c4487d76 | 457 | void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) |
a96c0514 | 458 | { |
c4487d76 | 459 | CPUARMState *env = &cpu->env; |
a96c0514 | 460 | |
c4487d76 PM |
461 | if (!arm_host_cpu_features.dtb_compatible) { |
462 | if (!kvm_enabled() || | |
463 | !kvm_arm_get_host_cpu_features(&arm_host_cpu_features)) { | |
464 | /* We can't report this error yet, so flag that we need to | |
465 | * in arm_cpu_realizefn(). | |
466 | */ | |
467 | cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; | |
468 | cpu->host_cpu_probe_failed = true; | |
469 | return; | |
470 | } | |
a96c0514 | 471 | } |
c4487d76 PM |
472 | |
473 | cpu->kvm_target = arm_host_cpu_features.target; | |
474 | cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible; | |
4674097c | 475 | cpu->isar = arm_host_cpu_features.isar; |
c4487d76 | 476 | env->features = arm_host_cpu_features.features; |
a96c0514 PM |
477 | } |
478 | ||
dea101a1 AJ |
479 | static bool kvm_no_adjvtime_get(Object *obj, Error **errp) |
480 | { | |
481 | return !ARM_CPU(obj)->kvm_adjvtime; | |
482 | } | |
483 | ||
484 | static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) | |
485 | { | |
486 | ARM_CPU(obj)->kvm_adjvtime = !value; | |
487 | } | |
488 | ||
68970d1e AJ |
489 | static bool kvm_steal_time_get(Object *obj, Error **errp) |
490 | { | |
491 | return ARM_CPU(obj)->kvm_steal_time != ON_OFF_AUTO_OFF; | |
492 | } | |
493 | ||
494 | static void kvm_steal_time_set(Object *obj, bool value, Error **errp) | |
495 | { | |
496 | ARM_CPU(obj)->kvm_steal_time = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; | |
497 | } | |
498 | ||
dea101a1 | 499 | /* KVM VCPU properties should be prefixed with "kvm-". */ |
cac675b5 | 500 | void kvm_arm_add_vcpu_properties(ARMCPU *cpu) |
dea101a1 | 501 | { |
9e6f8d8a | 502 | CPUARMState *env = &cpu->env; |
cac675b5 | 503 | Object *obj = OBJECT(cpu); |
dea101a1 | 504 | |
9e6f8d8a | 505 | if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { |
506 | cpu->kvm_adjvtime = true; | |
507 | object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, | |
508 | kvm_no_adjvtime_set); | |
509 | object_property_set_description(obj, "kvm-no-adjvtime", | |
510 | "Set on to disable the adjustment of " | |
511 | "the virtual counter. VM stopped time " | |
512 | "will be counted."); | |
513 | } | |
68970d1e AJ |
514 | |
515 | cpu->kvm_steal_time = ON_OFF_AUTO_AUTO; | |
516 | object_property_add_bool(obj, "kvm-steal-time", kvm_steal_time_get, | |
517 | kvm_steal_time_set); | |
518 | object_property_set_description(obj, "kvm-steal-time", | |
519 | "Set off to disable KVM steal time."); | |
dea101a1 AJ |
520 | } |
521 | ||
7d20e681 | 522 | bool kvm_arm_pmu_supported(void) |
ae502508 | 523 | { |
7d20e681 | 524 | return kvm_check_extension(kvm_state, KVM_CAP_ARM_PMU_V3); |
ae502508 AJ |
525 | } |
526 | ||
bcb902a1 | 527 | int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) |
a27382e2 EA |
528 | { |
529 | KVMState *s = KVM_STATE(ms->accelerator); | |
530 | int ret; | |
531 | ||
532 | ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE); | |
bcb902a1 AJ |
533 | *fixed_ipa = ret <= 0; |
534 | ||
a27382e2 EA |
535 | return ret > 0 ? ret : 40; |
536 | } | |
537 | ||
5e0d6590 AO |
538 | int kvm_arch_get_default_type(MachineState *ms) |
539 | { | |
1ab445af AO |
540 | bool fixed_ipa; |
541 | int size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa); | |
542 | return fixed_ipa ? 0 : size; | |
5e0d6590 AO |
543 | } |
544 | ||
b16565b3 | 545 | int kvm_arch_init(MachineState *ms, KVMState *s) |
494b00c7 | 546 | { |
fff9f555 | 547 | int ret = 0; |
494b00c7 CD |
548 | /* For ARM interrupt delivery is always asynchronous, |
549 | * whether we are using an in-kernel VGIC or not. | |
550 | */ | |
551 | kvm_async_interrupts_allowed = true; | |
a96c0514 | 552 | |
5d721b78 AG |
553 | /* |
554 | * PSCI wakes up secondary cores, so we always need to | |
555 | * have vCPUs waiting in kernel space | |
556 | */ | |
557 | kvm_halt_in_kernel_allowed = true; | |
558 | ||
1a1753f7 AB |
559 | cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); |
560 | ||
51641de4 RH |
561 | /* Check whether user space can specify guest syndrome value */ |
562 | cap_has_inject_serror_esr = | |
563 | kvm_check_extension(s, KVM_CAP_ARM_INJECT_SERROR_ESR); | |
564 | ||
fff9f555 EA |
565 | if (ms->smp.cpus > 256 && |
566 | !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { | |
567 | error_report("Using more than 256 vcpus requires a host kernel " | |
568 | "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2"); | |
569 | ret = -EINVAL; | |
570 | } | |
571 | ||
694bcaa8 BM |
572 | if (kvm_check_extension(s, KVM_CAP_ARM_NISV_TO_USER)) { |
573 | if (kvm_vm_enable_cap(s, KVM_CAP_ARM_NISV_TO_USER, 0)) { | |
574 | error_report("Failed to enable KVM_CAP_ARM_NISV_TO_USER cap"); | |
575 | } else { | |
576 | /* Set status for supporting the external dabt injection */ | |
577 | cap_has_inject_ext_dabt = kvm_check_extension(s, | |
578 | KVM_CAP_ARM_INJECT_EXT_DABT); | |
579 | } | |
580 | } | |
581 | ||
c8f2eb5d SK |
582 | if (s->kvm_eager_split_size) { |
583 | uint32_t sizes; | |
584 | ||
585 | sizes = kvm_vm_check_extension(s, KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES); | |
586 | if (!sizes) { | |
587 | s->kvm_eager_split_size = 0; | |
588 | warn_report("Eager Page Split support not available"); | |
589 | } else if (!(s->kvm_eager_split_size & sizes)) { | |
590 | error_report("Eager Page Split requested chunk size not valid"); | |
591 | ret = -EINVAL; | |
592 | } else { | |
593 | ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0, | |
594 | s->kvm_eager_split_size); | |
595 | if (ret < 0) { | |
596 | error_report("Enabling of Eager Page Split failed: %s", | |
597 | strerror(-ret)); | |
598 | } | |
599 | } | |
600 | } | |
601 | ||
dd2157d2 RH |
602 | max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); |
603 | hw_watchpoints = g_array_sized_new(true, true, | |
604 | sizeof(HWWatchpoint), max_hw_wps); | |
605 | ||
606 | max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); | |
607 | hw_breakpoints = g_array_sized_new(true, true, | |
608 | sizeof(HWBreakpoint), max_hw_bps); | |
ad5c6dde | 609 | |
fff9f555 | 610 | return ret; |
494b00c7 CD |
611 | } |
612 | ||
613 | unsigned long kvm_arch_vcpu_id(CPUState *cpu) | |
614 | { | |
615 | return cpu->cpu_index; | |
616 | } | |
617 | ||
eb035b48 PM |
618 | /* We track all the KVM devices which need their memory addresses |
619 | * passing to the kernel in a list of these structures. | |
620 | * When board init is complete we run through the list and | |
621 | * tell the kernel the base addresses of the memory regions. | |
622 | * We use a MemoryListener to track mapping and unmapping of | |
623 | * the regions during board creation, so the board models don't | |
624 | * need to do anything special for the KVM case. | |
19d1bd0b EA |
625 | * |
626 | * Sometimes the address must be OR'ed with some other fields | |
627 | * (for example for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION). | |
628 | * @kda_addr_ormask aims at storing the value of those fields. | |
eb035b48 PM |
629 | */ |
630 | typedef struct KVMDevice { | |
631 | struct kvm_arm_device_addr kda; | |
1da41cc1 | 632 | struct kvm_device_attr kdattr; |
19d1bd0b | 633 | uint64_t kda_addr_ormask; |
eb035b48 PM |
634 | MemoryRegion *mr; |
635 | QSLIST_ENTRY(KVMDevice) entries; | |
1da41cc1 | 636 | int dev_fd; |
eb035b48 PM |
637 | } KVMDevice; |
638 | ||
b58deb34 | 639 | static QSLIST_HEAD(, KVMDevice) kvm_devices_head; |
eb035b48 PM |
640 | |
641 | static void kvm_arm_devlistener_add(MemoryListener *listener, | |
642 | MemoryRegionSection *section) | |
643 | { | |
644 | KVMDevice *kd; | |
645 | ||
646 | QSLIST_FOREACH(kd, &kvm_devices_head, entries) { | |
647 | if (section->mr == kd->mr) { | |
648 | kd->kda.addr = section->offset_within_address_space; | |
649 | } | |
650 | } | |
651 | } | |
652 | ||
653 | static void kvm_arm_devlistener_del(MemoryListener *listener, | |
654 | MemoryRegionSection *section) | |
655 | { | |
656 | KVMDevice *kd; | |
657 | ||
658 | QSLIST_FOREACH(kd, &kvm_devices_head, entries) { | |
659 | if (section->mr == kd->mr) { | |
660 | kd->kda.addr = -1; | |
661 | } | |
662 | } | |
663 | } | |
664 | ||
665 | static MemoryListener devlistener = { | |
142518bd | 666 | .name = "kvm-arm", |
eb035b48 PM |
667 | .region_add = kvm_arm_devlistener_add, |
668 | .region_del = kvm_arm_devlistener_del, | |
14a868c6 | 669 | .priority = MEMORY_LISTENER_PRIORITY_MIN, |
eb035b48 PM |
670 | }; |
671 | ||
1da41cc1 CD |
672 | static void kvm_arm_set_device_addr(KVMDevice *kd) |
673 | { | |
674 | struct kvm_device_attr *attr = &kd->kdattr; | |
675 | int ret; | |
676 | ||
677 | /* If the device control API is available and we have a device fd on the | |
678 | * KVMDevice struct, let's use the newer API | |
679 | */ | |
680 | if (kd->dev_fd >= 0) { | |
681 | uint64_t addr = kd->kda.addr; | |
19d1bd0b EA |
682 | |
683 | addr |= kd->kda_addr_ormask; | |
1da41cc1 CD |
684 | attr->addr = (uintptr_t)&addr; |
685 | ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); | |
686 | } else { | |
687 | ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda); | |
688 | } | |
689 | ||
690 | if (ret < 0) { | |
691 | fprintf(stderr, "Failed to set device address: %s\n", | |
692 | strerror(-ret)); | |
693 | abort(); | |
694 | } | |
695 | } | |
696 | ||
eb035b48 PM |
697 | static void kvm_arm_machine_init_done(Notifier *notifier, void *data) |
698 | { | |
699 | KVMDevice *kd, *tkd; | |
700 | ||
eb035b48 PM |
701 | QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) { |
702 | if (kd->kda.addr != -1) { | |
1da41cc1 | 703 | kvm_arm_set_device_addr(kd); |
eb035b48 | 704 | } |
dfde4e6e | 705 | memory_region_unref(kd->mr); |
5ff9aaab | 706 | QSLIST_REMOVE_HEAD(&kvm_devices_head, entries); |
eb035b48 PM |
707 | g_free(kd); |
708 | } | |
0bbe4354 | 709 | memory_listener_unregister(&devlistener); |
eb035b48 PM |
710 | } |
711 | ||
712 | static Notifier notify = { | |
713 | .notify = kvm_arm_machine_init_done, | |
714 | }; | |
715 | ||
1da41cc1 | 716 | void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, |
19d1bd0b | 717 | uint64_t attr, int dev_fd, uint64_t addr_ormask) |
eb035b48 PM |
718 | { |
719 | KVMDevice *kd; | |
720 | ||
721 | if (!kvm_irqchip_in_kernel()) { | |
722 | return; | |
723 | } | |
724 | ||
725 | if (QSLIST_EMPTY(&kvm_devices_head)) { | |
4344af65 | 726 | memory_listener_register(&devlistener, &address_space_memory); |
eb035b48 PM |
727 | qemu_add_machine_init_done_notifier(¬ify); |
728 | } | |
729 | kd = g_new0(KVMDevice, 1); | |
730 | kd->mr = mr; | |
731 | kd->kda.id = devid; | |
732 | kd->kda.addr = -1; | |
1da41cc1 CD |
733 | kd->kdattr.flags = 0; |
734 | kd->kdattr.group = group; | |
735 | kd->kdattr.attr = attr; | |
736 | kd->dev_fd = dev_fd; | |
19d1bd0b | 737 | kd->kda_addr_ormask = addr_ormask; |
eb035b48 | 738 | QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries); |
dfde4e6e | 739 | memory_region_ref(kd->mr); |
eb035b48 PM |
740 | } |
741 | ||
38df27c8 AB |
742 | static int compare_u64(const void *a, const void *b) |
743 | { | |
744 | if (*(uint64_t *)a > *(uint64_t *)b) { | |
745 | return 1; | |
746 | } | |
747 | if (*(uint64_t *)a < *(uint64_t *)b) { | |
748 | return -1; | |
749 | } | |
750 | return 0; | |
751 | } | |
752 | ||
e5ac4200 AJ |
753 | /* |
754 | * cpreg_values are sorted in ascending order by KVM register ID | |
755 | * (see kvm_arm_init_cpreg_list). This allows us to cheaply find | |
756 | * the storage for a KVM register by ID with a binary search. | |
757 | */ | |
758 | static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) | |
759 | { | |
760 | uint64_t *res; | |
761 | ||
762 | res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, | |
763 | sizeof(uint64_t), compare_u64); | |
764 | assert(res); | |
765 | ||
766 | return &cpu->cpreg_values[res - cpu->cpreg_indexes]; | |
767 | } | |
768 | ||
f38ce925 RH |
769 | /** |
770 | * kvm_arm_reg_syncs_via_cpreg_list: | |
771 | * @regidx: KVM register index | |
772 | * | |
773 | * Return true if this KVM register should be synchronized via the | |
774 | * cpreg list of arbitrary system registers, false if it is synchronized | |
775 | * by hand using code in kvm_arch_get/put_registers(). | |
776 | */ | |
777 | static bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) | |
778 | { | |
779 | switch (regidx & KVM_REG_ARM_COPROC_MASK) { | |
780 | case KVM_REG_ARM_CORE: | |
781 | case KVM_REG_ARM64_SVE: | |
782 | return false; | |
783 | default: | |
784 | return true; | |
785 | } | |
786 | } | |
787 | ||
09ddc012 RH |
788 | /** |
789 | * kvm_arm_init_cpreg_list: | |
790 | * @cpu: ARMCPU | |
791 | * | |
792 | * Initialize the ARMCPU cpreg list according to the kernel's | |
38df27c8 AB |
793 | * definition of what CPU registers it knows about (and throw away |
794 | * the previous TCG-created cpreg list). | |
09ddc012 RH |
795 | * |
796 | * Returns: 0 if success, else < 0 error code | |
38df27c8 | 797 | */ |
09ddc012 | 798 | static int kvm_arm_init_cpreg_list(ARMCPU *cpu) |
38df27c8 AB |
799 | { |
800 | struct kvm_reg_list rl; | |
801 | struct kvm_reg_list *rlp; | |
802 | int i, ret, arraylen; | |
803 | CPUState *cs = CPU(cpu); | |
804 | ||
805 | rl.n = 0; | |
806 | ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl); | |
807 | if (ret != -E2BIG) { | |
808 | return ret; | |
809 | } | |
810 | rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t)); | |
811 | rlp->n = rl.n; | |
812 | ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp); | |
813 | if (ret) { | |
814 | goto out; | |
815 | } | |
816 | /* Sort the list we get back from the kernel, since cpreg_tuples | |
817 | * must be in strictly ascending order. | |
818 | */ | |
819 | qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64); | |
820 | ||
821 | for (i = 0, arraylen = 0; i < rlp->n; i++) { | |
822 | if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) { | |
823 | continue; | |
824 | } | |
825 | switch (rlp->reg[i] & KVM_REG_SIZE_MASK) { | |
826 | case KVM_REG_SIZE_U32: | |
827 | case KVM_REG_SIZE_U64: | |
828 | break; | |
829 | default: | |
830 | fprintf(stderr, "Can't handle size of register in kernel list\n"); | |
831 | ret = -EINVAL; | |
832 | goto out; | |
833 | } | |
834 | ||
835 | arraylen++; | |
836 | } | |
837 | ||
838 | cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen); | |
839 | cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen); | |
840 | cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes, | |
841 | arraylen); | |
842 | cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values, | |
843 | arraylen); | |
844 | cpu->cpreg_array_len = arraylen; | |
845 | cpu->cpreg_vmstate_array_len = arraylen; | |
846 | ||
847 | for (i = 0, arraylen = 0; i < rlp->n; i++) { | |
848 | uint64_t regidx = rlp->reg[i]; | |
849 | if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) { | |
850 | continue; | |
851 | } | |
852 | cpu->cpreg_indexes[arraylen] = regidx; | |
853 | arraylen++; | |
854 | } | |
855 | assert(cpu->cpreg_array_len == arraylen); | |
856 | ||
857 | if (!write_kvmstate_to_list(cpu)) { | |
858 | /* Shouldn't happen unless kernel is inconsistent about | |
859 | * what registers exist. | |
860 | */ | |
861 | fprintf(stderr, "Initial read of kernel register state failed\n"); | |
862 | ret = -EINVAL; | |
863 | goto out; | |
864 | } | |
865 | ||
866 | out: | |
867 | g_free(rlp); | |
868 | return ret; | |
869 | } | |
870 | ||
676fe684 RH |
871 | /** |
872 | * kvm_arm_cpreg_level: | |
873 | * @regidx: KVM register index | |
874 | * | |
875 | * Return the level of this coprocessor/system register. Return value is | |
876 | * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. | |
877 | */ | |
878 | static int kvm_arm_cpreg_level(uint64_t regidx) | |
879 | { | |
880 | /* | |
881 | * All system registers are assumed to be level KVM_PUT_RUNTIME_STATE. | |
882 | * If a register should be written less often, you must add it here | |
883 | * with a state of either KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. | |
884 | */ | |
885 | switch (regidx) { | |
886 | case KVM_REG_ARM_TIMER_CNT: | |
887 | case KVM_REG_ARM_PTIMER_CNT: | |
888 | return KVM_PUT_FULL_STATE; | |
889 | } | |
890 | return KVM_PUT_RUNTIME_STATE; | |
891 | } | |
892 | ||
ff047453 PM |
893 | bool write_kvmstate_to_list(ARMCPU *cpu) |
894 | { | |
895 | CPUState *cs = CPU(cpu); | |
896 | int i; | |
897 | bool ok = true; | |
898 | ||
899 | for (i = 0; i < cpu->cpreg_array_len; i++) { | |
ff047453 PM |
900 | uint64_t regidx = cpu->cpreg_indexes[i]; |
901 | uint32_t v32; | |
902 | int ret; | |
903 | ||
ff047453 PM |
904 | switch (regidx & KVM_REG_SIZE_MASK) { |
905 | case KVM_REG_SIZE_U32: | |
40d45b85 | 906 | ret = kvm_get_one_reg(cs, regidx, &v32); |
ff047453 PM |
907 | if (!ret) { |
908 | cpu->cpreg_values[i] = v32; | |
909 | } | |
910 | break; | |
911 | case KVM_REG_SIZE_U64: | |
40d45b85 | 912 | ret = kvm_get_one_reg(cs, regidx, cpu->cpreg_values + i); |
ff047453 PM |
913 | break; |
914 | default: | |
d385a605 | 915 | g_assert_not_reached(); |
ff047453 PM |
916 | } |
917 | if (ret) { | |
918 | ok = false; | |
919 | } | |
920 | } | |
921 | return ok; | |
922 | } | |
923 | ||
4b7a6bf4 | 924 | bool write_list_to_kvmstate(ARMCPU *cpu, int level) |
ff047453 PM |
925 | { |
926 | CPUState *cs = CPU(cpu); | |
927 | int i; | |
928 | bool ok = true; | |
929 | ||
930 | for (i = 0; i < cpu->cpreg_array_len; i++) { | |
ff047453 PM |
931 | uint64_t regidx = cpu->cpreg_indexes[i]; |
932 | uint32_t v32; | |
933 | int ret; | |
934 | ||
4b7a6bf4 CD |
935 | if (kvm_arm_cpreg_level(regidx) > level) { |
936 | continue; | |
937 | } | |
938 | ||
ff047453 PM |
939 | switch (regidx & KVM_REG_SIZE_MASK) { |
940 | case KVM_REG_SIZE_U32: | |
941 | v32 = cpu->cpreg_values[i]; | |
6c8b9a74 | 942 | ret = kvm_set_one_reg(cs, regidx, &v32); |
ff047453 PM |
943 | break; |
944 | case KVM_REG_SIZE_U64: | |
6c8b9a74 | 945 | ret = kvm_set_one_reg(cs, regidx, cpu->cpreg_values + i); |
ff047453 PM |
946 | break; |
947 | default: | |
d385a605 | 948 | g_assert_not_reached(); |
ff047453 | 949 | } |
ff047453 PM |
950 | if (ret) { |
951 | /* We might fail for "unknown register" and also for | |
952 | * "you tried to set a register which is constant with | |
953 | * a different value from what it actually contains". | |
954 | */ | |
955 | ok = false; | |
956 | } | |
957 | } | |
958 | return ok; | |
959 | } | |
960 | ||
e5ac4200 AJ |
961 | void kvm_arm_cpu_pre_save(ARMCPU *cpu) |
962 | { | |
963 | /* KVM virtual time adjustment */ | |
964 | if (cpu->kvm_vtime_dirty) { | |
965 | *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; | |
966 | } | |
967 | } | |
968 | ||
969 | void kvm_arm_cpu_post_load(ARMCPU *cpu) | |
970 | { | |
971 | /* KVM virtual time adjustment */ | |
972 | if (cpu->kvm_adjvtime) { | |
973 | cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); | |
974 | cpu->kvm_vtime_dirty = true; | |
975 | } | |
976 | } | |
977 | ||
38df27c8 AB |
978 | void kvm_arm_reset_vcpu(ARMCPU *cpu) |
979 | { | |
25f2895e CD |
980 | int ret; |
981 | ||
38df27c8 AB |
982 | /* Re-init VCPU so that all registers are set to |
983 | * their respective reset values. | |
984 | */ | |
bbb22d58 | 985 | ret = kvm_arm_vcpu_init(cpu); |
25f2895e CD |
986 | if (ret < 0) { |
987 | fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); | |
988 | abort(); | |
989 | } | |
990 | if (!write_kvmstate_to_list(cpu)) { | |
991 | fprintf(stderr, "write_kvmstate_to_list failed\n"); | |
992 | abort(); | |
993 | } | |
b698e4ee PM |
994 | /* |
995 | * Sync the reset values also into the CPUState. This is necessary | |
996 | * because the next thing we do will be a kvm_arch_put_registers() | |
997 | * which will update the list values from the CPUState before copying | |
998 | * the list values back to KVM. It's OK to ignore failure returns here | |
999 | * for the same reason we do so in kvm_arch_get_registers(). | |
1000 | */ | |
1001 | write_list_to_cpustate(cpu); | |
38df27c8 AB |
1002 | } |
1003 | ||
1a1753f7 AB |
1004 | /* |
1005 | * Update KVM's MP_STATE based on what QEMU thinks it is | |
1006 | */ | |
71c34911 | 1007 | static int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu) |
1a1753f7 AB |
1008 | { |
1009 | if (cap_has_mp_state) { | |
1010 | struct kvm_mp_state mp_state = { | |
062ba099 AB |
1011 | .mp_state = (cpu->power_state == PSCI_OFF) ? |
1012 | KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE | |
1a1753f7 | 1013 | }; |
71c34911 | 1014 | return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); |
1a1753f7 | 1015 | } |
1a1753f7 AB |
1016 | return 0; |
1017 | } | |
1018 | ||
1019 | /* | |
1020 | * Sync the KVM MP_STATE into QEMU | |
1021 | */ | |
71c34911 | 1022 | static int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) |
1a1753f7 AB |
1023 | { |
1024 | if (cap_has_mp_state) { | |
1025 | struct kvm_mp_state mp_state; | |
1026 | int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state); | |
1027 | if (ret) { | |
71c34911 | 1028 | return ret; |
1a1753f7 | 1029 | } |
062ba099 AB |
1030 | cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ? |
1031 | PSCI_OFF : PSCI_ON; | |
1a1753f7 | 1032 | } |
1a1753f7 AB |
1033 | return 0; |
1034 | } | |
1035 | ||
46512471 RH |
1036 | /** |
1037 | * kvm_arm_get_virtual_time: | |
76acc987 | 1038 | * @cpu: ARMCPU |
46512471 RH |
1039 | * |
1040 | * Gets the VCPU's virtual counter and stores it in the KVM CPU state. | |
1041 | */ | |
76acc987 | 1042 | static void kvm_arm_get_virtual_time(ARMCPU *cpu) |
e5ac4200 | 1043 | { |
e5ac4200 AJ |
1044 | int ret; |
1045 | ||
1046 | if (cpu->kvm_vtime_dirty) { | |
1047 | return; | |
1048 | } | |
1049 | ||
76acc987 | 1050 | ret = kvm_get_one_reg(CPU(cpu), KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); |
e5ac4200 AJ |
1051 | if (ret) { |
1052 | error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); | |
1053 | abort(); | |
1054 | } | |
1055 | ||
1056 | cpu->kvm_vtime_dirty = true; | |
1057 | } | |
1058 | ||
46512471 RH |
1059 | /** |
1060 | * kvm_arm_put_virtual_time: | |
76acc987 | 1061 | * @cpu: ARMCPU |
46512471 RH |
1062 | * |
1063 | * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. | |
1064 | */ | |
76acc987 | 1065 | static void kvm_arm_put_virtual_time(ARMCPU *cpu) |
e5ac4200 | 1066 | { |
e5ac4200 AJ |
1067 | int ret; |
1068 | ||
1069 | if (!cpu->kvm_vtime_dirty) { | |
1070 | return; | |
1071 | } | |
1072 | ||
76acc987 | 1073 | ret = kvm_set_one_reg(CPU(cpu), KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); |
e5ac4200 AJ |
1074 | if (ret) { |
1075 | error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); | |
1076 | abort(); | |
1077 | } | |
1078 | ||
1079 | cpu->kvm_vtime_dirty = false; | |
1080 | } | |
1081 | ||
353e03cd RH |
1082 | /** |
1083 | * kvm_put_vcpu_events: | |
1084 | * @cpu: ARMCPU | |
1085 | * | |
1086 | * Put VCPU related state to kvm. | |
1087 | * | |
1088 | * Returns: 0 if success else < 0 error code | |
1089 | */ | |
1090 | static int kvm_put_vcpu_events(ARMCPU *cpu) | |
202ccb6b DG |
1091 | { |
1092 | CPUARMState *env = &cpu->env; | |
1093 | struct kvm_vcpu_events events; | |
1094 | int ret; | |
1095 | ||
1096 | if (!kvm_has_vcpu_events()) { | |
1097 | return 0; | |
1098 | } | |
1099 | ||
1100 | memset(&events, 0, sizeof(events)); | |
1101 | events.exception.serror_pending = env->serror.pending; | |
1102 | ||
1103 | /* Inject SError to guest with specified syndrome if host kernel | |
1104 | * supports it, otherwise inject SError without syndrome. | |
1105 | */ | |
1106 | if (cap_has_inject_serror_esr) { | |
1107 | events.exception.serror_has_esr = env->serror.has_esr; | |
1108 | events.exception.serror_esr = env->serror.esr; | |
1109 | } | |
1110 | ||
1111 | ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); | |
1112 | if (ret) { | |
1113 | error_report("failed to put vcpu events"); | |
1114 | } | |
1115 | ||
1116 | return ret; | |
1117 | } | |
1118 | ||
353e03cd RH |
1119 | /** |
1120 | * kvm_get_vcpu_events: | |
1121 | * @cpu: ARMCPU | |
1122 | * | |
1123 | * Get VCPU related state from kvm. | |
1124 | * | |
1125 | * Returns: 0 if success else < 0 error code | |
1126 | */ | |
1127 | static int kvm_get_vcpu_events(ARMCPU *cpu) | |
202ccb6b DG |
1128 | { |
1129 | CPUARMState *env = &cpu->env; | |
1130 | struct kvm_vcpu_events events; | |
1131 | int ret; | |
1132 | ||
1133 | if (!kvm_has_vcpu_events()) { | |
1134 | return 0; | |
1135 | } | |
1136 | ||
1137 | memset(&events, 0, sizeof(events)); | |
1138 | ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); | |
1139 | if (ret) { | |
1140 | error_report("failed to get vcpu events"); | |
1141 | return ret; | |
1142 | } | |
1143 | ||
1144 | env->serror.pending = events.exception.serror_pending; | |
1145 | env->serror.has_esr = events.exception.serror_has_esr; | |
1146 | env->serror.esr = events.exception.serror_esr; | |
1147 | ||
1148 | return 0; | |
1149 | } | |
1150 | ||
20c83dc9 RH |
1151 | #define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0) |
1152 | #define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2) | |
1153 | ||
1154 | /* | |
1155 | * ESR_EL1 | |
1156 | * ISS encoding | |
1157 | * AARCH64: DFSC, bits [5:0] | |
1158 | * AARCH32: | |
1159 | * TTBCR.EAE == 0 | |
1160 | * FS[4] - DFSR[10] | |
1161 | * FS[3:0] - DFSR[3:0] | |
1162 | * TTBCR.EAE == 1 | |
1163 | * FS, bits [5:0] | |
1164 | */ | |
1165 | #define ESR_DFSC(aarch64, lpae, v) \ | |
1166 | ((aarch64 || (lpae)) ? ((v) & 0x3F) \ | |
1167 | : (((v) >> 6) | ((v) & 0x1F))) | |
1168 | ||
1169 | #define ESR_DFSC_EXTABT(aarch64, lpae) \ | |
1170 | ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8) | |
1171 | ||
1172 | /** | |
1173 | * kvm_arm_verify_ext_dabt_pending: | |
ca0d1b7c | 1174 | * @cpu: ARMCPU |
20c83dc9 RH |
1175 | * |
1176 | * Verify the fault status code wrt the Ext DABT injection | |
1177 | * | |
1178 | * Returns: true if the fault status code is as expected, false otherwise | |
1179 | */ | |
ca0d1b7c | 1180 | static bool kvm_arm_verify_ext_dabt_pending(ARMCPU *cpu) |
20c83dc9 | 1181 | { |
ca0d1b7c | 1182 | CPUState *cs = CPU(cpu); |
20c83dc9 RH |
1183 | uint64_t dfsr_val; |
1184 | ||
1185 | if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) { | |
20c83dc9 RH |
1186 | CPUARMState *env = &cpu->env; |
1187 | int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64); | |
1188 | int lpae = 0; | |
1189 | ||
1190 | if (!aarch64_mode) { | |
1191 | uint64_t ttbcr; | |
1192 | ||
1193 | if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) { | |
1194 | lpae = arm_feature(env, ARM_FEATURE_LPAE) | |
1195 | && (ttbcr & TTBCR_EAE); | |
1196 | } | |
1197 | } | |
1198 | /* | |
1199 | * The verification here is based on the DFSC bits | |
1200 | * of the ESR_EL1 reg only | |
1201 | */ | |
1202 | return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) == | |
1203 | ESR_DFSC_EXTABT(aarch64_mode, lpae)); | |
1204 | } | |
1205 | return false; | |
1206 | } | |
1207 | ||
494b00c7 CD |
1208 | void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) |
1209 | { | |
1711bfa5 BM |
1210 | ARMCPU *cpu = ARM_CPU(cs); |
1211 | CPUARMState *env = &cpu->env; | |
1212 | ||
1213 | if (unlikely(env->ext_dabt_raised)) { | |
1214 | /* | |
1215 | * Verifying that the ext DABT has been properly injected, | |
1216 | * otherwise risking indefinitely re-running the faulting instruction | |
1217 | * Covering a very narrow case for kernels 5.5..5.5.4 | |
1218 | * when injected abort was misconfigured to be | |
1219 | * an IMPLEMENTATION DEFINED exception (for 32-bit EL1) | |
1220 | */ | |
1221 | if (!arm_feature(env, ARM_FEATURE_AARCH64) && | |
ca0d1b7c | 1222 | unlikely(!kvm_arm_verify_ext_dabt_pending(cpu))) { |
1711bfa5 BM |
1223 | |
1224 | error_report("Data abort exception with no valid ISS generated by " | |
1225 | "guest memory access. KVM unable to emulate faulting " | |
1226 | "instruction. Failed to inject an external data abort " | |
1227 | "into the guest."); | |
1228 | abort(); | |
1229 | } | |
1230 | /* Clear the status */ | |
1231 | env->ext_dabt_raised = 0; | |
1232 | } | |
494b00c7 CD |
1233 | } |
1234 | ||
4c663752 | 1235 | MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) |
494b00c7 | 1236 | { |
5d721b78 AG |
1237 | ARMCPU *cpu; |
1238 | uint32_t switched_level; | |
1239 | ||
1240 | if (kvm_irqchip_in_kernel()) { | |
1241 | /* | |
1242 | * We only need to sync timer states with user-space interrupt | |
1243 | * controllers, so return early and save cycles if we don't. | |
1244 | */ | |
1245 | return MEMTXATTRS_UNSPECIFIED; | |
1246 | } | |
1247 | ||
1248 | cpu = ARM_CPU(cs); | |
1249 | ||
1250 | /* Synchronize our shadowed in-kernel device irq lines with the kvm ones */ | |
1251 | if (run->s.regs.device_irq_level != cpu->device_irq_level) { | |
1252 | switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level; | |
1253 | ||
195801d7 | 1254 | bql_lock(); |
5d721b78 AG |
1255 | |
1256 | if (switched_level & KVM_ARM_DEV_EL1_VTIMER) { | |
1257 | qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT], | |
1258 | !!(run->s.regs.device_irq_level & | |
1259 | KVM_ARM_DEV_EL1_VTIMER)); | |
1260 | switched_level &= ~KVM_ARM_DEV_EL1_VTIMER; | |
1261 | } | |
1262 | ||
1263 | if (switched_level & KVM_ARM_DEV_EL1_PTIMER) { | |
1264 | qemu_set_irq(cpu->gt_timer_outputs[GTIMER_PHYS], | |
1265 | !!(run->s.regs.device_irq_level & | |
1266 | KVM_ARM_DEV_EL1_PTIMER)); | |
1267 | switched_level &= ~KVM_ARM_DEV_EL1_PTIMER; | |
1268 | } | |
1269 | ||
b1659527 AJ |
1270 | if (switched_level & KVM_ARM_DEV_PMU) { |
1271 | qemu_set_irq(cpu->pmu_interrupt, | |
1272 | !!(run->s.regs.device_irq_level & KVM_ARM_DEV_PMU)); | |
1273 | switched_level &= ~KVM_ARM_DEV_PMU; | |
1274 | } | |
5d721b78 AG |
1275 | |
1276 | if (switched_level) { | |
1277 | qemu_log_mask(LOG_UNIMP, "%s: unhandled in-kernel device IRQ %x\n", | |
1278 | __func__, switched_level); | |
1279 | } | |
1280 | ||
1281 | /* We also mark unknown levels as processed to not waste cycles */ | |
1282 | cpu->device_irq_level = run->s.regs.device_irq_level; | |
195801d7 | 1283 | bql_unlock(); |
5d721b78 AG |
1284 | } |
1285 | ||
4c663752 | 1286 | return MEMTXATTRS_UNSPECIFIED; |
494b00c7 CD |
1287 | } |
1288 | ||
396b6c50 | 1289 | static void kvm_arm_vm_state_change(void *opaque, bool running, RunState state) |
e5ac4200 | 1290 | { |
76acc987 | 1291 | ARMCPU *cpu = opaque; |
e5ac4200 AJ |
1292 | |
1293 | if (running) { | |
1294 | if (cpu->kvm_adjvtime) { | |
76acc987 | 1295 | kvm_arm_put_virtual_time(cpu); |
e5ac4200 AJ |
1296 | } |
1297 | } else { | |
1298 | if (cpu->kvm_adjvtime) { | |
76acc987 | 1299 | kvm_arm_get_virtual_time(cpu); |
e5ac4200 AJ |
1300 | } |
1301 | } | |
1302 | } | |
2ecb2027 | 1303 | |
694bcaa8 BM |
1304 | /** |
1305 | * kvm_arm_handle_dabt_nisv: | |
3187e06a | 1306 | * @cpu: ARMCPU |
694bcaa8 BM |
1307 | * @esr_iss: ISS encoding (limited) for the exception from Data Abort |
1308 | * ISV bit set to '0b0' -> no valid instruction syndrome | |
1309 | * @fault_ipa: faulting address for the synchronous data abort | |
1310 | * | |
1311 | * Returns: 0 if the exception has been handled, < 0 otherwise | |
1312 | */ | |
3187e06a | 1313 | static int kvm_arm_handle_dabt_nisv(ARMCPU *cpu, uint64_t esr_iss, |
694bcaa8 BM |
1314 | uint64_t fault_ipa) |
1315 | { | |
1711bfa5 | 1316 | CPUARMState *env = &cpu->env; |
694bcaa8 BM |
1317 | /* |
1318 | * Request KVM to inject the external data abort into the guest | |
1319 | */ | |
1320 | if (cap_has_inject_ext_dabt) { | |
1321 | struct kvm_vcpu_events events = { }; | |
1322 | /* | |
1323 | * The external data abort event will be handled immediately by KVM | |
1324 | * using the address fault that triggered the exit on given VCPU. | |
1325 | * Requesting injection of the external data abort does not rely | |
1326 | * on any other VCPU state. Therefore, in this particular case, the VCPU | |
1327 | * synchronization can be exceptionally skipped. | |
1328 | */ | |
1329 | events.exception.ext_dabt_pending = 1; | |
1330 | /* KVM_CAP_ARM_INJECT_EXT_DABT implies KVM_CAP_VCPU_EVENTS */ | |
3187e06a | 1331 | if (!kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events)) { |
1711bfa5 BM |
1332 | env->ext_dabt_raised = 1; |
1333 | return 0; | |
1334 | } | |
694bcaa8 BM |
1335 | } else { |
1336 | error_report("Data abort exception triggered by guest memory access " | |
1337 | "at physical address: 0x" TARGET_FMT_lx, | |
1338 | (target_ulong)fault_ipa); | |
1339 | error_printf("KVM unable to emulate faulting instruction.\n"); | |
1340 | } | |
1341 | return -1; | |
1342 | } | |
1343 | ||
5cba8f26 RH |
1344 | /** |
1345 | * kvm_arm_handle_debug: | |
39639275 | 1346 | * @cpu: ARMCPU |
5cba8f26 RH |
1347 | * @debug_exit: debug part of the KVM exit structure |
1348 | * | |
1349 | * Returns: TRUE if the debug exception was handled. | |
1350 | * | |
1351 | * See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register | |
1352 | * | |
1353 | * To minimise translating between kernel and user-space the kernel | |
1354 | * ABI just provides user-space with the full exception syndrome | |
1355 | * register value to be decoded in QEMU. | |
1356 | */ | |
39639275 | 1357 | static bool kvm_arm_handle_debug(ARMCPU *cpu, |
5cba8f26 RH |
1358 | struct kvm_debug_exit_arch *debug_exit) |
1359 | { | |
1360 | int hsr_ec = syn_get_ec(debug_exit->hsr); | |
39639275 | 1361 | CPUState *cs = CPU(cpu); |
5cba8f26 RH |
1362 | CPUARMState *env = &cpu->env; |
1363 | ||
1364 | /* Ensure PC is synchronised */ | |
1365 | kvm_cpu_synchronize_state(cs); | |
1366 | ||
1367 | switch (hsr_ec) { | |
1368 | case EC_SOFTWARESTEP: | |
1369 | if (cs->singlestep_enabled) { | |
1370 | return true; | |
1371 | } else { | |
1372 | /* | |
1373 | * The kernel should have suppressed the guest's ability to | |
1374 | * single step at this point so something has gone wrong. | |
1375 | */ | |
1376 | error_report("%s: guest single-step while debugging unsupported" | |
1377 | " (%"PRIx64", %"PRIx32")", | |
1378 | __func__, env->pc, debug_exit->hsr); | |
1379 | return false; | |
1380 | } | |
1381 | break; | |
1382 | case EC_AA64_BKPT: | |
1383 | if (kvm_find_sw_breakpoint(cs, env->pc)) { | |
1384 | return true; | |
1385 | } | |
1386 | break; | |
1387 | case EC_BREAKPOINT: | |
1388 | if (find_hw_breakpoint(cs, env->pc)) { | |
1389 | return true; | |
1390 | } | |
1391 | break; | |
1392 | case EC_WATCHPOINT: | |
1393 | { | |
1394 | CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); | |
1395 | if (wp) { | |
1396 | cs->watchpoint_hit = wp; | |
1397 | return true; | |
1398 | } | |
1399 | break; | |
1400 | } | |
1401 | default: | |
1402 | error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", | |
1403 | __func__, debug_exit->hsr, env->pc); | |
1404 | } | |
1405 | ||
1406 | /* If we are not handling the debug exception it must belong to | |
1407 | * the guest. Let's re-use the existing TCG interrupt code to set | |
1408 | * everything up properly. | |
1409 | */ | |
1410 | cs->exception_index = EXCP_BKPT; | |
1411 | env->exception.syndrome = debug_exit->hsr; | |
1412 | env->exception.vaddress = debug_exit->far; | |
1413 | env->exception.target_el = 1; | |
195801d7 | 1414 | bql_lock(); |
5cba8f26 | 1415 | arm_cpu_do_interrupt(cs); |
195801d7 | 1416 | bql_unlock(); |
5cba8f26 RH |
1417 | |
1418 | return false; | |
1419 | } | |
1420 | ||
494b00c7 CD |
1421 | int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) |
1422 | { | |
3187e06a | 1423 | ARMCPU *cpu = ARM_CPU(cs); |
2ecb2027 AB |
1424 | int ret = 0; |
1425 | ||
1426 | switch (run->exit_reason) { | |
1427 | case KVM_EXIT_DEBUG: | |
39639275 | 1428 | if (kvm_arm_handle_debug(cpu, &run->debug.arch)) { |
2ecb2027 AB |
1429 | ret = EXCP_DEBUG; |
1430 | } /* otherwise return to guest */ | |
1431 | break; | |
694bcaa8 BM |
1432 | case KVM_EXIT_ARM_NISV: |
1433 | /* External DABT with no valid iss to decode */ | |
3187e06a | 1434 | ret = kvm_arm_handle_dabt_nisv(cpu, run->arm_nisv.esr_iss, |
694bcaa8 BM |
1435 | run->arm_nisv.fault_ipa); |
1436 | break; | |
2ecb2027 AB |
1437 | default: |
1438 | qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", | |
1439 | __func__, run->exit_reason); | |
1440 | break; | |
1441 | } | |
1442 | return ret; | |
494b00c7 CD |
1443 | } |
1444 | ||
494b00c7 CD |
1445 | bool kvm_arch_stop_on_emulation_error(CPUState *cs) |
1446 | { | |
1447 | return true; | |
1448 | } | |
1449 | ||
1450 | int kvm_arch_process_async_events(CPUState *cs) | |
1451 | { | |
1452 | return 0; | |
1453 | } | |
1454 | ||
ea79c599 RH |
1455 | /** |
1456 | * kvm_arm_hw_debug_active: | |
366bf10e | 1457 | * @cpu: ARMCPU |
ea79c599 RH |
1458 | * |
1459 | * Return: TRUE if any hardware breakpoints in use. | |
1460 | */ | |
366bf10e | 1461 | static bool kvm_arm_hw_debug_active(ARMCPU *cpu) |
ea79c599 RH |
1462 | { |
1463 | return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); | |
1464 | } | |
1465 | ||
ec4145f7 RH |
1466 | /** |
1467 | * kvm_arm_copy_hw_debug_data: | |
1468 | * @ptr: kvm_guest_debug_arch structure | |
1469 | * | |
1470 | * Copy the architecture specific debug registers into the | |
1471 | * kvm_guest_debug ioctl structure. | |
1472 | */ | |
1473 | static void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) | |
1474 | { | |
1475 | int i; | |
1476 | memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); | |
1477 | ||
1478 | for (i = 0; i < max_hw_wps; i++) { | |
1479 | HWWatchpoint *wp = get_hw_wp(i); | |
1480 | ptr->dbg_wcr[i] = wp->wcr; | |
1481 | ptr->dbg_wvr[i] = wp->wvr; | |
1482 | } | |
1483 | for (i = 0; i < max_hw_bps; i++) { | |
1484 | HWBreakpoint *bp = get_hw_bp(i); | |
1485 | ptr->dbg_bcr[i] = bp->bcr; | |
1486 | ptr->dbg_bvr[i] = bp->bvr; | |
1487 | } | |
1488 | } | |
1489 | ||
494b00c7 CD |
1490 | void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) |
1491 | { | |
2ecb2027 AB |
1492 | if (kvm_sw_breakpoints_active(cs)) { |
1493 | dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; | |
1494 | } | |
366bf10e | 1495 | if (kvm_arm_hw_debug_active(ARM_CPU(cs))) { |
e4482ab7 AB |
1496 | dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW; |
1497 | kvm_arm_copy_hw_debug_data(&dbg->arch); | |
1498 | } | |
494b00c7 | 1499 | } |
b3a1c626 AK |
1500 | |
1501 | void kvm_arch_init_irq_routing(KVMState *s) | |
1502 | { | |
1503 | } | |
1da41cc1 | 1504 | |
4376c40d | 1505 | int kvm_arch_irqchip_create(KVMState *s) |
1da41cc1 | 1506 | { |
4376c40d | 1507 | if (kvm_kernel_irqchip_split()) { |
47c182fe | 1508 | error_report("-machine kernel_irqchip=split is not supported on ARM."); |
4376c40d | 1509 | exit(1); |
15eafc2e PB |
1510 | } |
1511 | ||
1da41cc1 CD |
1512 | /* If we can create the VGIC using the newer device control API, we |
1513 | * let the device do this when it initializes itself, otherwise we | |
1514 | * fall back to the old API */ | |
34e85cd9 PF |
1515 | return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); |
1516 | } | |
1da41cc1 | 1517 | |
34e85cd9 PF |
1518 | int kvm_arm_vgic_probe(void) |
1519 | { | |
d45efe47 EA |
1520 | int val = 0; |
1521 | ||
34e85cd9 PF |
1522 | if (kvm_create_device(kvm_state, |
1523 | KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) { | |
d45efe47 EA |
1524 | val |= KVM_ARM_VGIC_V3; |
1525 | } | |
1526 | if (kvm_create_device(kvm_state, | |
1527 | KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) { | |
1528 | val |= KVM_ARM_VGIC_V2; | |
1da41cc1 | 1529 | } |
d45efe47 | 1530 | return val; |
1da41cc1 | 1531 | } |
9e03a040 | 1532 | |
f6530926 EA |
1533 | int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level) |
1534 | { | |
1535 | int kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) | irq; | |
1536 | int cpu_idx1 = cpu % 256; | |
1537 | int cpu_idx2 = cpu / 256; | |
1538 | ||
1539 | kvm_irq |= (cpu_idx1 << KVM_ARM_IRQ_VCPU_SHIFT) | | |
1540 | (cpu_idx2 << KVM_ARM_IRQ_VCPU2_SHIFT); | |
1541 | ||
1542 | return kvm_set_irq(kvm_state, kvm_irq, !!level); | |
1543 | } | |
1544 | ||
9e03a040 | 1545 | int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, |
dc9f06ca | 1546 | uint64_t address, uint32_t data, PCIDevice *dev) |
9e03a040 | 1547 | { |
b05c81d2 EA |
1548 | AddressSpace *as = pci_device_iommu_address_space(dev); |
1549 | hwaddr xlat, len, doorbell_gpa; | |
1550 | MemoryRegionSection mrs; | |
1551 | MemoryRegion *mr; | |
b05c81d2 EA |
1552 | |
1553 | if (as == &address_space_memory) { | |
1554 | return 0; | |
1555 | } | |
1556 | ||
1557 | /* MSI doorbell address is translated by an IOMMU */ | |
1558 | ||
dfa0d9b8 HM |
1559 | RCU_READ_LOCK_GUARD(); |
1560 | ||
bc6b1cec PM |
1561 | mr = address_space_translate(as, address, &xlat, &len, true, |
1562 | MEMTXATTRS_UNSPECIFIED); | |
dfa0d9b8 | 1563 | |
b05c81d2 | 1564 | if (!mr) { |
dfa0d9b8 | 1565 | return 1; |
b05c81d2 | 1566 | } |
dfa0d9b8 | 1567 | |
b05c81d2 | 1568 | mrs = memory_region_find(mr, xlat, 1); |
dfa0d9b8 | 1569 | |
b05c81d2 | 1570 | if (!mrs.mr) { |
dfa0d9b8 | 1571 | return 1; |
b05c81d2 EA |
1572 | } |
1573 | ||
1574 | doorbell_gpa = mrs.offset_within_address_space; | |
1575 | memory_region_unref(mrs.mr); | |
1576 | ||
1577 | route->u.msi.address_lo = doorbell_gpa; | |
1578 | route->u.msi.address_hi = doorbell_gpa >> 32; | |
1579 | ||
1580 | trace_kvm_arm_fixup_msi_route(address, doorbell_gpa); | |
1581 | ||
dfa0d9b8 | 1582 | return 0; |
9e03a040 | 1583 | } |
1850b6b7 | 1584 | |
38d87493 PX |
1585 | int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, |
1586 | int vector, PCIDevice *dev) | |
1587 | { | |
1588 | return 0; | |
1589 | } | |
1590 | ||
1591 | int kvm_arch_release_virq_post(int virq) | |
1592 | { | |
1593 | return 0; | |
1594 | } | |
1595 | ||
1850b6b7 EA |
1596 | int kvm_arch_msi_data_to_gsi(uint32_t data) |
1597 | { | |
1598 | return (data - 32) & 0xffff; | |
1599 | } | |
92a5199b TL |
1600 | |
1601 | bool kvm_arch_cpu_check_are_resettable(void) | |
1602 | { | |
1603 | return true; | |
1604 | } | |
3dba0a33 | 1605 | |
c8f2eb5d SK |
1606 | static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, |
1607 | const char *name, void *opaque, | |
1608 | Error **errp) | |
1609 | { | |
1610 | KVMState *s = KVM_STATE(obj); | |
1611 | uint64_t value = s->kvm_eager_split_size; | |
1612 | ||
1613 | visit_type_size(v, name, &value, errp); | |
1614 | } | |
1615 | ||
1616 | static void kvm_arch_set_eager_split_size(Object *obj, Visitor *v, | |
1617 | const char *name, void *opaque, | |
1618 | Error **errp) | |
1619 | { | |
1620 | KVMState *s = KVM_STATE(obj); | |
1621 | uint64_t value; | |
1622 | ||
1623 | if (s->fd != -1) { | |
1624 | error_setg(errp, "Unable to set early-split-size after KVM has been initialized"); | |
1625 | return; | |
1626 | } | |
1627 | ||
1628 | if (!visit_type_size(v, name, &value, errp)) { | |
1629 | return; | |
1630 | } | |
1631 | ||
1632 | if (value && !is_power_of_2(value)) { | |
1633 | error_setg(errp, "early-split-size must be a power of two"); | |
1634 | return; | |
1635 | } | |
1636 | ||
1637 | s->kvm_eager_split_size = value; | |
1638 | } | |
1639 | ||
3dba0a33 PB |
1640 | void kvm_arch_accel_class_init(ObjectClass *oc) |
1641 | { | |
c8f2eb5d SK |
1642 | object_class_property_add(oc, "eager-split-size", "size", |
1643 | kvm_arch_get_eager_split_size, | |
1644 | kvm_arch_set_eager_split_size, NULL, NULL); | |
1645 | ||
1646 | object_class_property_set_description(oc, "eager-split-size", | |
1647 | "Eager Page Split chunk size for hugepages. (default: 0, disabled)"); | |
3dba0a33 | 1648 | } |
de3c9601 RH |
1649 | |
1650 | int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) | |
1651 | { | |
1652 | switch (type) { | |
1653 | case GDB_BREAKPOINT_HW: | |
1654 | return insert_hw_breakpoint(addr); | |
1655 | break; | |
1656 | case GDB_WATCHPOINT_READ: | |
1657 | case GDB_WATCHPOINT_WRITE: | |
1658 | case GDB_WATCHPOINT_ACCESS: | |
1659 | return insert_hw_watchpoint(addr, len, type); | |
1660 | default: | |
1661 | return -ENOSYS; | |
1662 | } | |
1663 | } | |
1664 | ||
1665 | int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) | |
1666 | { | |
1667 | switch (type) { | |
1668 | case GDB_BREAKPOINT_HW: | |
1669 | return delete_hw_breakpoint(addr); | |
1670 | case GDB_WATCHPOINT_READ: | |
1671 | case GDB_WATCHPOINT_WRITE: | |
1672 | case GDB_WATCHPOINT_ACCESS: | |
1673 | return delete_hw_watchpoint(addr, len, type); | |
1674 | default: | |
1675 | return -ENOSYS; | |
1676 | } | |
1677 | } | |
1678 | ||
1679 | void kvm_arch_remove_all_hw_breakpoints(void) | |
1680 | { | |
1681 | if (cur_hw_wps > 0) { | |
1682 | g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); | |
1683 | } | |
1684 | if (cur_hw_bps > 0) { | |
1685 | g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); | |
1686 | } | |
1687 | } | |
1688 | ||
e77034f7 | 1689 | static bool kvm_arm_set_device_attr(ARMCPU *cpu, struct kvm_device_attr *attr, |
de3c9601 RH |
1690 | const char *name) |
1691 | { | |
1692 | int err; | |
1693 | ||
e77034f7 | 1694 | err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); |
de3c9601 RH |
1695 | if (err != 0) { |
1696 | error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); | |
1697 | return false; | |
1698 | } | |
1699 | ||
e77034f7 | 1700 | err = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEVICE_ATTR, attr); |
de3c9601 RH |
1701 | if (err != 0) { |
1702 | error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); | |
1703 | return false; | |
1704 | } | |
1705 | ||
1706 | return true; | |
1707 | } | |
1708 | ||
d344f5ba | 1709 | void kvm_arm_pmu_init(ARMCPU *cpu) |
de3c9601 RH |
1710 | { |
1711 | struct kvm_device_attr attr = { | |
1712 | .group = KVM_ARM_VCPU_PMU_V3_CTRL, | |
1713 | .attr = KVM_ARM_VCPU_PMU_V3_INIT, | |
1714 | }; | |
1715 | ||
d344f5ba | 1716 | if (!cpu->has_pmu) { |
de3c9601 RH |
1717 | return; |
1718 | } | |
d344f5ba | 1719 | if (!kvm_arm_set_device_attr(cpu, &attr, "PMU")) { |
de3c9601 RH |
1720 | error_report("failed to init PMU"); |
1721 | abort(); | |
1722 | } | |
1723 | } | |
1724 | ||
5ed84f3b | 1725 | void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq) |
de3c9601 RH |
1726 | { |
1727 | struct kvm_device_attr attr = { | |
1728 | .group = KVM_ARM_VCPU_PMU_V3_CTRL, | |
1729 | .addr = (intptr_t)&irq, | |
1730 | .attr = KVM_ARM_VCPU_PMU_V3_IRQ, | |
1731 | }; | |
1732 | ||
5ed84f3b | 1733 | if (!cpu->has_pmu) { |
de3c9601 RH |
1734 | return; |
1735 | } | |
5ed84f3b | 1736 | if (!kvm_arm_set_device_attr(cpu, &attr, "PMU")) { |
de3c9601 RH |
1737 | error_report("failed to set irq for PMU"); |
1738 | abort(); | |
1739 | } | |
1740 | } | |
1741 | ||
55503372 | 1742 | void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) |
de3c9601 RH |
1743 | { |
1744 | struct kvm_device_attr attr = { | |
1745 | .group = KVM_ARM_VCPU_PVTIME_CTRL, | |
1746 | .attr = KVM_ARM_VCPU_PVTIME_IPA, | |
1747 | .addr = (uint64_t)&ipa, | |
1748 | }; | |
1749 | ||
55503372 | 1750 | if (cpu->kvm_steal_time == ON_OFF_AUTO_OFF) { |
de3c9601 RH |
1751 | return; |
1752 | } | |
55503372 | 1753 | if (!kvm_arm_set_device_attr(cpu, &attr, "PVTIME IPA")) { |
de3c9601 RH |
1754 | error_report("failed to init PVTIME IPA"); |
1755 | abort(); | |
1756 | } | |
1757 | } | |
1758 | ||
1759 | void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) | |
1760 | { | |
1761 | bool has_steal_time = kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); | |
1762 | ||
1763 | if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) { | |
1764 | if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { | |
1765 | cpu->kvm_steal_time = ON_OFF_AUTO_OFF; | |
1766 | } else { | |
1767 | cpu->kvm_steal_time = ON_OFF_AUTO_ON; | |
1768 | } | |
1769 | } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) { | |
1770 | if (!has_steal_time) { | |
1771 | error_setg(errp, "'kvm-steal-time' cannot be enabled " | |
1772 | "on this host"); | |
1773 | return; | |
1774 | } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { | |
1775 | /* | |
1776 | * DEN0057A chapter 2 says "This specification only covers | |
1777 | * systems in which the Execution state of the hypervisor | |
1778 | * as well as EL1 of virtual machines is AArch64.". And, | |
1779 | * to ensure that, the smc/hvc calls are only specified as | |
1780 | * smc64/hvc64. | |
1781 | */ | |
1782 | error_setg(errp, "'kvm-steal-time' cannot be enabled " | |
1783 | "for AArch32 guests"); | |
1784 | return; | |
1785 | } | |
1786 | } | |
1787 | } | |
1788 | ||
1789 | bool kvm_arm_aarch32_supported(void) | |
1790 | { | |
1791 | return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); | |
1792 | } | |
1793 | ||
1794 | bool kvm_arm_sve_supported(void) | |
1795 | { | |
1796 | return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); | |
1797 | } | |
1798 | ||
1799 | QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); | |
1800 | ||
d6339282 | 1801 | uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) |
de3c9601 RH |
1802 | { |
1803 | /* Only call this function if kvm_arm_sve_supported() returns true. */ | |
1804 | static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; | |
1805 | static bool probed; | |
1806 | uint32_t vq = 0; | |
1807 | int i; | |
1808 | ||
1809 | /* | |
1810 | * KVM ensures all host CPUs support the same set of vector lengths. | |
1811 | * So we only need to create the scratch VCPUs once and then cache | |
1812 | * the results. | |
1813 | */ | |
1814 | if (!probed) { | |
1815 | struct kvm_vcpu_init init = { | |
1816 | .target = -1, | |
1817 | .features[0] = (1 << KVM_ARM_VCPU_SVE), | |
1818 | }; | |
1819 | struct kvm_one_reg reg = { | |
1820 | .id = KVM_REG_ARM64_SVE_VLS, | |
1821 | .addr = (uint64_t)&vls[0], | |
1822 | }; | |
1823 | int fdarray[3], ret; | |
1824 | ||
1825 | probed = true; | |
1826 | ||
1827 | if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { | |
1828 | error_report("failed to create scratch VCPU with SVE enabled"); | |
1829 | abort(); | |
1830 | } | |
1831 | ret = ioctl(fdarray[2], KVM_GET_ONE_REG, ®); | |
1832 | kvm_arm_destroy_scratch_host_vcpu(fdarray); | |
1833 | if (ret) { | |
1834 | error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s", | |
1835 | strerror(errno)); | |
1836 | abort(); | |
1837 | } | |
1838 | ||
1839 | for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) { | |
1840 | if (vls[i]) { | |
1841 | vq = 64 - clz64(vls[i]) + i * 64; | |
1842 | break; | |
1843 | } | |
1844 | } | |
1845 | if (vq > ARM_MAX_VQ) { | |
1846 | warn_report("KVM supports vector lengths larger than " | |
1847 | "QEMU can enable"); | |
1848 | vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ); | |
1849 | } | |
1850 | } | |
1851 | ||
1852 | return vls[0]; | |
1853 | } | |
1854 | ||
bc1b09b3 | 1855 | static int kvm_arm_sve_set_vls(ARMCPU *cpu) |
de3c9601 | 1856 | { |
de3c9601 RH |
1857 | uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq.map }; |
1858 | ||
1859 | assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); | |
1860 | ||
bc1b09b3 | 1861 | return kvm_set_one_reg(CPU(cpu), KVM_REG_ARM64_SVE_VLS, &vls[0]); |
de3c9601 RH |
1862 | } |
1863 | ||
1864 | #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 | |
1865 | ||
1866 | int kvm_arch_init_vcpu(CPUState *cs) | |
1867 | { | |
1868 | int ret; | |
1869 | uint64_t mpidr; | |
1870 | ARMCPU *cpu = ARM_CPU(cs); | |
1871 | CPUARMState *env = &cpu->env; | |
1872 | uint64_t psciver; | |
1873 | ||
1874 | if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || | |
1875 | !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { | |
1876 | error_report("KVM is not supported for this guest CPU type"); | |
1877 | return -EINVAL; | |
1878 | } | |
1879 | ||
76acc987 | 1880 | qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cpu); |
de3c9601 RH |
1881 | |
1882 | /* Determine init features for this CPU */ | |
1883 | memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); | |
1884 | if (cs->start_powered_off) { | |
1885 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; | |
1886 | } | |
1887 | if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { | |
1888 | cpu->psci_version = QEMU_PSCI_VERSION_0_2; | |
1889 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; | |
1890 | } | |
1891 | if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { | |
1892 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; | |
1893 | } | |
1894 | if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { | |
1895 | cpu->has_pmu = false; | |
1896 | } | |
1897 | if (cpu->has_pmu) { | |
1898 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; | |
1899 | } else { | |
1900 | env->features &= ~(1ULL << ARM_FEATURE_PMU); | |
1901 | } | |
1902 | if (cpu_isar_feature(aa64_sve, cpu)) { | |
1903 | assert(kvm_arm_sve_supported()); | |
1904 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE; | |
1905 | } | |
1906 | if (cpu_isar_feature(aa64_pauth, cpu)) { | |
1907 | cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | | |
1908 | 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); | |
1909 | } | |
1910 | ||
1911 | /* Do KVM_ARM_VCPU_INIT ioctl */ | |
bbb22d58 | 1912 | ret = kvm_arm_vcpu_init(cpu); |
de3c9601 RH |
1913 | if (ret) { |
1914 | return ret; | |
1915 | } | |
1916 | ||
1917 | if (cpu_isar_feature(aa64_sve, cpu)) { | |
bc1b09b3 | 1918 | ret = kvm_arm_sve_set_vls(cpu); |
de3c9601 RH |
1919 | if (ret) { |
1920 | return ret; | |
1921 | } | |
0d31a631 | 1922 | ret = kvm_arm_vcpu_finalize(cpu, KVM_ARM_VCPU_SVE); |
de3c9601 RH |
1923 | if (ret) { |
1924 | return ret; | |
1925 | } | |
1926 | } | |
1927 | ||
1928 | /* | |
1929 | * KVM reports the exact PSCI version it is implementing via a | |
1930 | * special sysreg. If it is present, use its contents to determine | |
1931 | * what to report to the guest in the dtb (it is the PSCI version, | |
1932 | * in the same 15-bits major 16-bits minor format that PSCI_VERSION | |
1933 | * returns). | |
1934 | */ | |
1935 | if (!kvm_get_one_reg(cs, KVM_REG_ARM_PSCI_VERSION, &psciver)) { | |
1936 | cpu->psci_version = psciver; | |
1937 | } | |
1938 | ||
1939 | /* | |
1940 | * When KVM is in use, PSCI is emulated in-kernel and not by qemu. | |
1941 | * Currently KVM has its own idea about MPIDR assignment, so we | |
1942 | * override our defaults with what we get from KVM. | |
1943 | */ | |
1944 | ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); | |
1945 | if (ret) { | |
1946 | return ret; | |
1947 | } | |
1948 | cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; | |
1949 | ||
de3c9601 RH |
1950 | return kvm_arm_init_cpreg_list(cpu); |
1951 | } | |
1952 | ||
1953 | int kvm_arch_destroy_vcpu(CPUState *cs) | |
1954 | { | |
1955 | return 0; | |
1956 | } | |
1957 | ||
1958 | /* Callers must hold the iothread mutex lock */ | |
1959 | static void kvm_inject_arm_sea(CPUState *c) | |
1960 | { | |
1961 | ARMCPU *cpu = ARM_CPU(c); | |
1962 | CPUARMState *env = &cpu->env; | |
1963 | uint32_t esr; | |
1964 | bool same_el; | |
1965 | ||
1966 | c->exception_index = EXCP_DATA_ABORT; | |
1967 | env->exception.target_el = 1; | |
1968 | ||
1969 | /* | |
1970 | * Set the DFSC to synchronous external abort and set FnV to not valid, | |
1971 | * this will tell guest the FAR_ELx is UNKNOWN for this abort. | |
1972 | */ | |
1973 | same_el = arm_current_el(env) == env->exception.target_el; | |
1974 | esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10); | |
1975 | ||
1976 | env->exception.syndrome = esr; | |
1977 | ||
1978 | arm_cpu_do_interrupt(c); | |
1979 | } | |
1980 | ||
1981 | #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ | |
1982 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) | |
1983 | ||
1984 | #define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ | |
1985 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) | |
1986 | ||
1987 | #define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ | |
1988 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) | |
1989 | ||
1990 | static int kvm_arch_put_fpsimd(CPUState *cs) | |
1991 | { | |
1992 | CPUARMState *env = &ARM_CPU(cs)->env; | |
1993 | int i, ret; | |
1994 | ||
1995 | for (i = 0; i < 32; i++) { | |
1996 | uint64_t *q = aa64_vfp_qreg(env, i); | |
1997 | #if HOST_BIG_ENDIAN | |
1998 | uint64_t fp_val[2] = { q[1], q[0] }; | |
1999 | ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), | |
2000 | fp_val); | |
2001 | #else | |
2002 | ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); | |
2003 | #endif | |
2004 | if (ret) { | |
2005 | return ret; | |
2006 | } | |
2007 | } | |
2008 | ||
2009 | return 0; | |
2010 | } | |
2011 | ||
2012 | /* | |
2013 | * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits | |
2014 | * and PREGS and the FFR have a slice size of 256 bits. However we simply hard | |
2015 | * code the slice index to zero for now as it's unlikely we'll need more than | |
2016 | * one slice for quite some time. | |
2017 | */ | |
2018 | static int kvm_arch_put_sve(CPUState *cs) | |
2019 | { | |
2020 | ARMCPU *cpu = ARM_CPU(cs); | |
2021 | CPUARMState *env = &cpu->env; | |
2022 | uint64_t tmp[ARM_MAX_VQ * 2]; | |
2023 | uint64_t *r; | |
2024 | int n, ret; | |
2025 | ||
2026 | for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { | |
2027 | r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); | |
2028 | ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); | |
2029 | if (ret) { | |
2030 | return ret; | |
2031 | } | |
2032 | } | |
2033 | ||
2034 | for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { | |
2035 | r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], | |
2036 | DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); | |
2037 | ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); | |
2038 | if (ret) { | |
2039 | return ret; | |
2040 | } | |
2041 | } | |
2042 | ||
2043 | r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], | |
2044 | DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); | |
2045 | ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); | |
2046 | if (ret) { | |
2047 | return ret; | |
2048 | } | |
2049 | ||
2050 | return 0; | |
2051 | } | |
2052 | ||
2053 | int kvm_arch_put_registers(CPUState *cs, int level) | |
2054 | { | |
2055 | uint64_t val; | |
2056 | uint32_t fpr; | |
2057 | int i, ret; | |
2058 | unsigned int el; | |
2059 | ||
2060 | ARMCPU *cpu = ARM_CPU(cs); | |
2061 | CPUARMState *env = &cpu->env; | |
2062 | ||
2063 | /* If we are in AArch32 mode then we need to copy the AArch32 regs to the | |
2064 | * AArch64 registers before pushing them out to 64-bit KVM. | |
2065 | */ | |
2066 | if (!is_a64(env)) { | |
2067 | aarch64_sync_32_to_64(env); | |
2068 | } | |
2069 | ||
2070 | for (i = 0; i < 31; i++) { | |
2071 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), | |
2072 | &env->xregs[i]); | |
2073 | if (ret) { | |
2074 | return ret; | |
2075 | } | |
2076 | } | |
2077 | ||
2078 | /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the | |
2079 | * QEMU side we keep the current SP in xregs[31] as well. | |
2080 | */ | |
2081 | aarch64_save_sp(env, 1); | |
2082 | ||
2083 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); | |
2084 | if (ret) { | |
2085 | return ret; | |
2086 | } | |
2087 | ||
2088 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); | |
2089 | if (ret) { | |
2090 | return ret; | |
2091 | } | |
2092 | ||
2093 | /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ | |
2094 | if (is_a64(env)) { | |
2095 | val = pstate_read(env); | |
2096 | } else { | |
2097 | val = cpsr_read(env); | |
2098 | } | |
2099 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); | |
2100 | if (ret) { | |
2101 | return ret; | |
2102 | } | |
2103 | ||
2104 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); | |
2105 | if (ret) { | |
2106 | return ret; | |
2107 | } | |
2108 | ||
2109 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); | |
2110 | if (ret) { | |
2111 | return ret; | |
2112 | } | |
2113 | ||
2114 | /* Saved Program State Registers | |
2115 | * | |
2116 | * Before we restore from the banked_spsr[] array we need to | |
2117 | * ensure that any modifications to env->spsr are correctly | |
2118 | * reflected in the banks. | |
2119 | */ | |
2120 | el = arm_current_el(env); | |
2121 | if (el > 0 && !is_a64(env)) { | |
2122 | i = bank_number(env->uncached_cpsr & CPSR_M); | |
2123 | env->banked_spsr[i] = env->spsr; | |
2124 | } | |
2125 | ||
2126 | /* KVM 0-4 map to QEMU banks 1-5 */ | |
2127 | for (i = 0; i < KVM_NR_SPSR; i++) { | |
2128 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(spsr[i]), | |
2129 | &env->banked_spsr[i + 1]); | |
2130 | if (ret) { | |
2131 | return ret; | |
2132 | } | |
2133 | } | |
2134 | ||
2135 | if (cpu_isar_feature(aa64_sve, cpu)) { | |
2136 | ret = kvm_arch_put_sve(cs); | |
2137 | } else { | |
2138 | ret = kvm_arch_put_fpsimd(cs); | |
2139 | } | |
2140 | if (ret) { | |
2141 | return ret; | |
2142 | } | |
2143 | ||
2144 | fpr = vfp_get_fpsr(env); | |
2145 | ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); | |
2146 | if (ret) { | |
2147 | return ret; | |
2148 | } | |
2149 | ||
2150 | fpr = vfp_get_fpcr(env); | |
2151 | ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); | |
2152 | if (ret) { | |
2153 | return ret; | |
2154 | } | |
2155 | ||
2156 | write_cpustate_to_list(cpu, true); | |
2157 | ||
2158 | if (!write_list_to_kvmstate(cpu, level)) { | |
2159 | return -EINVAL; | |
2160 | } | |
2161 | ||
2162 | /* | |
2163 | * Setting VCPU events should be triggered after syncing the registers | |
2164 | * to avoid overwriting potential changes made by KVM upon calling | |
2165 | * KVM_SET_VCPU_EVENTS ioctl | |
2166 | */ | |
2167 | ret = kvm_put_vcpu_events(cpu); | |
2168 | if (ret) { | |
2169 | return ret; | |
2170 | } | |
2171 | ||
71c34911 | 2172 | return kvm_arm_sync_mpstate_to_kvm(cpu); |
de3c9601 RH |
2173 | } |
2174 | ||
2175 | static int kvm_arch_get_fpsimd(CPUState *cs) | |
2176 | { | |
2177 | CPUARMState *env = &ARM_CPU(cs)->env; | |
2178 | int i, ret; | |
2179 | ||
2180 | for (i = 0; i < 32; i++) { | |
2181 | uint64_t *q = aa64_vfp_qreg(env, i); | |
2182 | ret = kvm_get_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); | |
2183 | if (ret) { | |
2184 | return ret; | |
2185 | } else { | |
2186 | #if HOST_BIG_ENDIAN | |
2187 | uint64_t t; | |
2188 | t = q[0], q[0] = q[1], q[1] = t; | |
2189 | #endif | |
2190 | } | |
2191 | } | |
2192 | ||
2193 | return 0; | |
2194 | } | |
2195 | ||
2196 | /* | |
2197 | * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits | |
2198 | * and PREGS and the FFR have a slice size of 256 bits. However we simply hard | |
2199 | * code the slice index to zero for now as it's unlikely we'll need more than | |
2200 | * one slice for quite some time. | |
2201 | */ | |
2202 | static int kvm_arch_get_sve(CPUState *cs) | |
2203 | { | |
2204 | ARMCPU *cpu = ARM_CPU(cs); | |
2205 | CPUARMState *env = &cpu->env; | |
2206 | uint64_t *r; | |
2207 | int n, ret; | |
2208 | ||
2209 | for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { | |
2210 | r = &env->vfp.zregs[n].d[0]; | |
2211 | ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); | |
2212 | if (ret) { | |
2213 | return ret; | |
2214 | } | |
2215 | sve_bswap64(r, r, cpu->sve_max_vq * 2); | |
2216 | } | |
2217 | ||
2218 | for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { | |
2219 | r = &env->vfp.pregs[n].p[0]; | |
2220 | ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); | |
2221 | if (ret) { | |
2222 | return ret; | |
2223 | } | |
2224 | sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); | |
2225 | } | |
2226 | ||
2227 | r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; | |
2228 | ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); | |
2229 | if (ret) { | |
2230 | return ret; | |
2231 | } | |
2232 | sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); | |
2233 | ||
2234 | return 0; | |
2235 | } | |
2236 | ||
2237 | int kvm_arch_get_registers(CPUState *cs) | |
2238 | { | |
2239 | uint64_t val; | |
2240 | unsigned int el; | |
2241 | uint32_t fpr; | |
2242 | int i, ret; | |
2243 | ||
2244 | ARMCPU *cpu = ARM_CPU(cs); | |
2245 | CPUARMState *env = &cpu->env; | |
2246 | ||
2247 | for (i = 0; i < 31; i++) { | |
2248 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), | |
2249 | &env->xregs[i]); | |
2250 | if (ret) { | |
2251 | return ret; | |
2252 | } | |
2253 | } | |
2254 | ||
2255 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); | |
2256 | if (ret) { | |
2257 | return ret; | |
2258 | } | |
2259 | ||
2260 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); | |
2261 | if (ret) { | |
2262 | return ret; | |
2263 | } | |
2264 | ||
2265 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); | |
2266 | if (ret) { | |
2267 | return ret; | |
2268 | } | |
2269 | ||
2270 | env->aarch64 = ((val & PSTATE_nRW) == 0); | |
2271 | if (is_a64(env)) { | |
2272 | pstate_write(env, val); | |
2273 | } else { | |
2274 | cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); | |
2275 | } | |
2276 | ||
2277 | /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the | |
2278 | * QEMU side we keep the current SP in xregs[31] as well. | |
2279 | */ | |
2280 | aarch64_restore_sp(env, 1); | |
2281 | ||
2282 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); | |
2283 | if (ret) { | |
2284 | return ret; | |
2285 | } | |
2286 | ||
2287 | /* If we are in AArch32 mode then we need to sync the AArch32 regs with the | |
2288 | * incoming AArch64 regs received from 64-bit KVM. | |
2289 | * We must perform this after all of the registers have been acquired from | |
2290 | * the kernel. | |
2291 | */ | |
2292 | if (!is_a64(env)) { | |
2293 | aarch64_sync_64_to_32(env); | |
2294 | } | |
2295 | ||
2296 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); | |
2297 | if (ret) { | |
2298 | return ret; | |
2299 | } | |
2300 | ||
2301 | /* Fetch the SPSR registers | |
2302 | * | |
2303 | * KVM SPSRs 0-4 map to QEMU banks 1-5 | |
2304 | */ | |
2305 | for (i = 0; i < KVM_NR_SPSR; i++) { | |
2306 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(spsr[i]), | |
2307 | &env->banked_spsr[i + 1]); | |
2308 | if (ret) { | |
2309 | return ret; | |
2310 | } | |
2311 | } | |
2312 | ||
2313 | el = arm_current_el(env); | |
2314 | if (el > 0 && !is_a64(env)) { | |
2315 | i = bank_number(env->uncached_cpsr & CPSR_M); | |
2316 | env->spsr = env->banked_spsr[i]; | |
2317 | } | |
2318 | ||
2319 | if (cpu_isar_feature(aa64_sve, cpu)) { | |
2320 | ret = kvm_arch_get_sve(cs); | |
2321 | } else { | |
2322 | ret = kvm_arch_get_fpsimd(cs); | |
2323 | } | |
2324 | if (ret) { | |
2325 | return ret; | |
2326 | } | |
2327 | ||
2328 | ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); | |
2329 | if (ret) { | |
2330 | return ret; | |
2331 | } | |
2332 | vfp_set_fpsr(env, fpr); | |
2333 | ||
2334 | ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); | |
2335 | if (ret) { | |
2336 | return ret; | |
2337 | } | |
2338 | vfp_set_fpcr(env, fpr); | |
2339 | ||
2340 | ret = kvm_get_vcpu_events(cpu); | |
2341 | if (ret) { | |
2342 | return ret; | |
2343 | } | |
2344 | ||
2345 | if (!write_kvmstate_to_list(cpu)) { | |
2346 | return -EINVAL; | |
2347 | } | |
2348 | /* Note that it's OK to have registers which aren't in CPUState, | |
2349 | * so we can ignore a failure return here. | |
2350 | */ | |
2351 | write_list_to_cpustate(cpu); | |
2352 | ||
71c34911 | 2353 | ret = kvm_arm_sync_mpstate_to_qemu(cpu); |
de3c9601 RH |
2354 | |
2355 | /* TODO: other registers */ | |
2356 | return ret; | |
2357 | } | |
2358 | ||
2359 | void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) | |
2360 | { | |
2361 | ram_addr_t ram_addr; | |
2362 | hwaddr paddr; | |
2363 | ||
2364 | assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); | |
2365 | ||
2366 | if (acpi_ghes_present() && addr) { | |
2367 | ram_addr = qemu_ram_addr_from_host(addr); | |
2368 | if (ram_addr != RAM_ADDR_INVALID && | |
2369 | kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { | |
2370 | kvm_hwpoison_page_add(ram_addr); | |
2371 | /* | |
2372 | * If this is a BUS_MCEERR_AR, we know we have been called | |
2373 | * synchronously from the vCPU thread, so we can easily | |
2374 | * synchronize the state and inject an error. | |
2375 | * | |
2376 | * TODO: we currently don't tell the guest at all about | |
2377 | * BUS_MCEERR_AO. In that case we might either be being | |
2378 | * called synchronously from the vCPU thread, or a bit | |
2379 | * later from the main thread, so doing the injection of | |
2380 | * the error would be more complicated. | |
2381 | */ | |
2382 | if (code == BUS_MCEERR_AR) { | |
2383 | kvm_cpu_synchronize_state(c); | |
2384 | if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { | |
2385 | kvm_inject_arm_sea(c); | |
2386 | } else { | |
2387 | error_report("failed to record the error"); | |
2388 | abort(); | |
2389 | } | |
2390 | } | |
2391 | return; | |
2392 | } | |
2393 | if (code == BUS_MCEERR_AO) { | |
2394 | error_report("Hardware memory error at addr %p for memory used by " | |
2395 | "QEMU itself instead of guest system!", addr); | |
2396 | } | |
2397 | } | |
2398 | ||
2399 | if (code == BUS_MCEERR_AR) { | |
2400 | error_report("Hardware memory error!"); | |
2401 | exit(1); | |
2402 | } | |
2403 | } | |
2404 | ||
2405 | /* C6.6.29 BRK instruction */ | |
2406 | static const uint32_t brk_insn = 0xd4200000; | |
2407 | ||
2408 | int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) | |
2409 | { | |
2410 | if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || | |
2411 | cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { | |
2412 | return -EINVAL; | |
2413 | } | |
2414 | return 0; | |
2415 | } | |
2416 | ||
2417 | int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) | |
2418 | { | |
2419 | static uint32_t brk; | |
2420 | ||
2421 | if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || | |
2422 | brk != brk_insn || | |
2423 | cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { | |
2424 | return -EINVAL; | |
2425 | } | |
2426 | return 0; | |
2427 | } |