]>
Commit | Line | Data |
---|---|---|
494b00c7 CD |
1 | /* |
2 | * ARM implementation of KVM hooks | |
3 | * | |
4 | * Copyright Christoffer Dall 2009-2010 | |
de3c9601 RH |
5 | * Copyright Mian-M. Hamayun 2013, Virtual Open Systems |
6 | * Copyright Alex Bennée 2014, Linaro | |
494b00c7 CD |
7 | * |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
9 | * See the COPYING file in the top-level directory. | |
10 | * | |
11 | */ | |
12 | ||
74c21bd0 | 13 | #include "qemu/osdep.h" |
494b00c7 | 14 | #include <sys/ioctl.h> |
494b00c7 CD |
15 | |
16 | #include <linux/kvm.h> | |
17 | ||
494b00c7 | 18 | #include "qemu/timer.h" |
2ecb2027 | 19 | #include "qemu/error-report.h" |
db725815 | 20 | #include "qemu/main-loop.h" |
dea101a1 AJ |
21 | #include "qom/object.h" |
22 | #include "qapi/error.h" | |
494b00c7 | 23 | #include "sysemu/sysemu.h" |
de3c9601 | 24 | #include "sysemu/runstate.h" |
494b00c7 | 25 | #include "sysemu/kvm.h" |
a27382e2 | 26 | #include "sysemu/kvm_int.h" |
eb035b48 | 27 | #include "kvm_arm.h" |
494b00c7 | 28 | #include "cpu.h" |
b05c81d2 | 29 | #include "trace.h" |
38df27c8 | 30 | #include "internals.h" |
b05c81d2 | 31 | #include "hw/pci/pci.h" |
4c663752 | 32 | #include "exec/memattrs.h" |
4344af65 | 33 | #include "exec/address-spaces.h" |
de3c9601 | 34 | #include "exec/gdbstub.h" |
15eafc2e | 35 | #include "hw/boards.h" |
64552b6b | 36 | #include "hw/irq.h" |
c8f2eb5d | 37 | #include "qapi/visitor.h" |
03dd024f | 38 | #include "qemu/log.h" |
de3c9601 RH |
39 | #include "hw/acpi/acpi.h" |
40 | #include "hw/acpi/ghes.h" | |
494b00c7 CD |
41 | |
42 | const KVMCapabilityInfo kvm_arch_required_capabilities[] = { | |
43 | KVM_CAP_LAST_INFO | |
44 | }; | |
45 | ||
1a1753f7 | 46 | static bool cap_has_mp_state; |
202ccb6b | 47 | static bool cap_has_inject_serror_esr; |
694bcaa8 | 48 | static bool cap_has_inject_ext_dabt; |
1a1753f7 | 49 | |
dc40d45e RH |
50 | /** |
51 | * ARMHostCPUFeatures: information about the host CPU (identified | |
52 | * by asking the host kernel) | |
53 | */ | |
54 | typedef struct ARMHostCPUFeatures { | |
55 | ARMISARegisters isar; | |
56 | uint64_t features; | |
57 | uint32_t target; | |
58 | const char *dtb_compatible; | |
59 | } ARMHostCPUFeatures; | |
60 | ||
c4487d76 PM |
61 | static ARMHostCPUFeatures arm_host_cpu_features; |
62 | ||
5a8a6013 RH |
63 | /** |
64 | * kvm_arm_vcpu_init: | |
bbb22d58 | 65 | * @cpu: ARMCPU |
5a8a6013 RH |
66 | * |
67 | * Initialize (or reinitialize) the VCPU by invoking the | |
68 | * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature | |
69 | * bitmask specified in the CPUState. | |
70 | * | |
71 | * Returns: 0 if success else < 0 error code | |
72 | */ | |
bbb22d58 | 73 | static int kvm_arm_vcpu_init(ARMCPU *cpu) |
228d5e04 | 74 | { |
228d5e04 PS |
75 | struct kvm_vcpu_init init; |
76 | ||
77 | init.target = cpu->kvm_target; | |
78 | memcpy(init.features, cpu->kvm_init_features, sizeof(init.features)); | |
79 | ||
bbb22d58 | 80 | return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_INIT, &init); |
228d5e04 PS |
81 | } |
82 | ||
c223c67a RH |
83 | /** |
84 | * kvm_arm_vcpu_finalize: | |
0d31a631 | 85 | * @cpu: ARMCPU |
c223c67a RH |
86 | * @feature: feature to finalize |
87 | * | |
88 | * Finalizes the configuration of the specified VCPU feature by | |
89 | * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring | |
90 | * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of | |
91 | * KVM's API documentation. | |
92 | * | |
93 | * Returns: 0 if success else < 0 error code | |
94 | */ | |
0d31a631 | 95 | static int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature) |
14e99e0f | 96 | { |
0d31a631 | 97 | return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_FINALIZE, &feature); |
14e99e0f AJ |
98 | } |
99 | ||
a96c0514 PM |
100 | bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, |
101 | int *fdarray, | |
102 | struct kvm_vcpu_init *init) | |
103 | { | |
0cdb4020 | 104 | int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1; |
d26f2f93 | 105 | int max_vm_pa_size; |
a96c0514 | 106 | |
448058aa | 107 | kvmfd = qemu_open_old("/dev/kvm", O_RDWR); |
a96c0514 PM |
108 | if (kvmfd < 0) { |
109 | goto err; | |
110 | } | |
d26f2f93 MZ |
111 | max_vm_pa_size = ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE); |
112 | if (max_vm_pa_size < 0) { | |
113 | max_vm_pa_size = 0; | |
114 | } | |
bbde13cd PM |
115 | do { |
116 | vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); | |
117 | } while (vmfd == -1 && errno == EINTR); | |
a96c0514 PM |
118 | if (vmfd < 0) { |
119 | goto err; | |
120 | } | |
121 | cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); | |
122 | if (cpufd < 0) { | |
123 | goto err; | |
124 | } | |
125 | ||
2f340e9c PX |
126 | if (!init) { |
127 | /* Caller doesn't want the VCPU to be initialized, so skip it */ | |
128 | goto finish; | |
129 | } | |
130 | ||
0cdb4020 AJ |
131 | if (init->target == -1) { |
132 | struct kvm_vcpu_init preferred; | |
133 | ||
134 | ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred); | |
135 | if (!ret) { | |
136 | init->target = preferred.target; | |
137 | } | |
138 | } | |
a96c0514 PM |
139 | if (ret >= 0) { |
140 | ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); | |
141 | if (ret < 0) { | |
142 | goto err; | |
143 | } | |
2f340e9c | 144 | } else if (cpus_to_try) { |
a96c0514 PM |
145 | /* Old kernel which doesn't know about the |
146 | * PREFERRED_TARGET ioctl: we know it will only support | |
147 | * creating one kind of guest CPU which is its preferred | |
148 | * CPU type. | |
149 | */ | |
0cdb4020 AJ |
150 | struct kvm_vcpu_init try; |
151 | ||
a96c0514 | 152 | while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) { |
0cdb4020 AJ |
153 | try.target = *cpus_to_try++; |
154 | memcpy(try.features, init->features, sizeof(init->features)); | |
155 | ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try); | |
a96c0514 PM |
156 | if (ret >= 0) { |
157 | break; | |
158 | } | |
159 | } | |
160 | if (ret < 0) { | |
161 | goto err; | |
162 | } | |
0cdb4020 | 163 | init->target = try.target; |
2f340e9c PX |
164 | } else { |
165 | /* Treat a NULL cpus_to_try argument the same as an empty | |
166 | * list, which means we will fail the call since this must | |
167 | * be an old kernel which doesn't support PREFERRED_TARGET. | |
168 | */ | |
169 | goto err; | |
a96c0514 PM |
170 | } |
171 | ||
2f340e9c | 172 | finish: |
a96c0514 PM |
173 | fdarray[0] = kvmfd; |
174 | fdarray[1] = vmfd; | |
175 | fdarray[2] = cpufd; | |
176 | ||
177 | return true; | |
178 | ||
179 | err: | |
180 | if (cpufd >= 0) { | |
181 | close(cpufd); | |
182 | } | |
183 | if (vmfd >= 0) { | |
184 | close(vmfd); | |
185 | } | |
186 | if (kvmfd >= 0) { | |
187 | close(kvmfd); | |
188 | } | |
189 | ||
190 | return false; | |
191 | } | |
192 | ||
193 | void kvm_arm_destroy_scratch_host_vcpu(int *fdarray) | |
194 | { | |
195 | int i; | |
196 | ||
197 | for (i = 2; i >= 0; i--) { | |
198 | close(fdarray[i]); | |
199 | } | |
200 | } | |
201 | ||
dc40d45e RH |
202 | static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) |
203 | { | |
204 | uint64_t ret; | |
205 | struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; | |
206 | int err; | |
207 | ||
208 | assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); | |
209 | err = ioctl(fd, KVM_GET_ONE_REG, &idreg); | |
210 | if (err < 0) { | |
211 | return -1; | |
212 | } | |
213 | *pret = ret; | |
214 | return 0; | |
215 | } | |
216 | ||
217 | static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) | |
218 | { | |
219 | struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; | |
220 | ||
221 | assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); | |
222 | return ioctl(fd, KVM_GET_ONE_REG, &idreg); | |
223 | } | |
224 | ||
225 | static bool kvm_arm_pauth_supported(void) | |
226 | { | |
227 | return (kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_ADDRESS) && | |
228 | kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC)); | |
229 | } | |
230 | ||
231 | static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) | |
232 | { | |
233 | /* Identify the feature bits corresponding to the host CPU, and | |
234 | * fill out the ARMHostCPUClass fields accordingly. To do this | |
235 | * we have to create a scratch VM, create a single CPU inside it, | |
236 | * and then query that CPU for the relevant ID registers. | |
237 | */ | |
238 | int fdarray[3]; | |
239 | bool sve_supported; | |
240 | bool pmu_supported = false; | |
241 | uint64_t features = 0; | |
242 | int err; | |
243 | ||
244 | /* Old kernels may not know about the PREFERRED_TARGET ioctl: however | |
245 | * we know these will only support creating one kind of guest CPU, | |
246 | * which is its preferred CPU type. Fortunately these old kernels | |
247 | * support only a very limited number of CPUs. | |
248 | */ | |
249 | static const uint32_t cpus_to_try[] = { | |
250 | KVM_ARM_TARGET_AEM_V8, | |
251 | KVM_ARM_TARGET_FOUNDATION_V8, | |
252 | KVM_ARM_TARGET_CORTEX_A57, | |
253 | QEMU_KVM_ARM_TARGET_NONE | |
254 | }; | |
255 | /* | |
256 | * target = -1 informs kvm_arm_create_scratch_host_vcpu() | |
257 | * to use the preferred target | |
258 | */ | |
259 | struct kvm_vcpu_init init = { .target = -1, }; | |
260 | ||
261 | /* | |
262 | * Ask for SVE if supported, so that we can query ID_AA64ZFR0, | |
263 | * which is otherwise RAZ. | |
264 | */ | |
265 | sve_supported = kvm_arm_sve_supported(); | |
266 | if (sve_supported) { | |
267 | init.features[0] |= 1 << KVM_ARM_VCPU_SVE; | |
268 | } | |
269 | ||
270 | /* | |
271 | * Ask for Pointer Authentication if supported, so that we get | |
272 | * the unsanitized field values for AA64ISAR1_EL1. | |
273 | */ | |
274 | if (kvm_arm_pauth_supported()) { | |
275 | init.features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | | |
276 | 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); | |
277 | } | |
278 | ||
279 | if (kvm_arm_pmu_supported()) { | |
280 | init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; | |
281 | pmu_supported = true; | |
282 | } | |
283 | ||
284 | if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { | |
285 | return false; | |
286 | } | |
287 | ||
288 | ahcf->target = init.target; | |
289 | ahcf->dtb_compatible = "arm,arm-v8"; | |
290 | ||
291 | err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, | |
292 | ARM64_SYS_REG(3, 0, 0, 4, 0)); | |
293 | if (unlikely(err < 0)) { | |
294 | /* | |
295 | * Before v4.15, the kernel only exposed a limited number of system | |
296 | * registers, not including any of the interesting AArch64 ID regs. | |
297 | * For the most part we could leave these fields as zero with minimal | |
298 | * effect, since this does not affect the values seen by the guest. | |
299 | * | |
300 | * However, it could cause problems down the line for QEMU, | |
301 | * so provide a minimal v8.0 default. | |
302 | * | |
303 | * ??? Could read MIDR and use knowledge from cpu64.c. | |
304 | * ??? Could map a page of memory into our temp guest and | |
305 | * run the tiniest of hand-crafted kernels to extract | |
306 | * the values seen by the guest. | |
307 | * ??? Either of these sounds like too much effort just | |
308 | * to work around running a modern host kernel. | |
309 | */ | |
310 | ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ | |
311 | err = 0; | |
312 | } else { | |
313 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, | |
314 | ARM64_SYS_REG(3, 0, 0, 4, 1)); | |
315 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, | |
316 | ARM64_SYS_REG(3, 0, 0, 4, 5)); | |
317 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, | |
318 | ARM64_SYS_REG(3, 0, 0, 5, 0)); | |
319 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, | |
320 | ARM64_SYS_REG(3, 0, 0, 5, 1)); | |
321 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, | |
322 | ARM64_SYS_REG(3, 0, 0, 6, 0)); | |
323 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, | |
324 | ARM64_SYS_REG(3, 0, 0, 6, 1)); | |
325 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2, | |
326 | ARM64_SYS_REG(3, 0, 0, 6, 2)); | |
327 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, | |
328 | ARM64_SYS_REG(3, 0, 0, 7, 0)); | |
329 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, | |
330 | ARM64_SYS_REG(3, 0, 0, 7, 1)); | |
331 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, | |
332 | ARM64_SYS_REG(3, 0, 0, 7, 2)); | |
333 | ||
334 | /* | |
335 | * Note that if AArch32 support is not present in the host, | |
336 | * the AArch32 sysregs are present to be read, but will | |
337 | * return UNKNOWN values. This is neither better nor worse | |
338 | * than skipping the reads and leaving 0, as we must avoid | |
339 | * considering the values in every case. | |
340 | */ | |
341 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, | |
342 | ARM64_SYS_REG(3, 0, 0, 1, 0)); | |
343 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, | |
344 | ARM64_SYS_REG(3, 0, 0, 1, 1)); | |
345 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, | |
346 | ARM64_SYS_REG(3, 0, 0, 1, 2)); | |
347 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, | |
348 | ARM64_SYS_REG(3, 0, 0, 1, 4)); | |
349 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, | |
350 | ARM64_SYS_REG(3, 0, 0, 1, 5)); | |
351 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, | |
352 | ARM64_SYS_REG(3, 0, 0, 1, 6)); | |
353 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, | |
354 | ARM64_SYS_REG(3, 0, 0, 1, 7)); | |
355 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, | |
356 | ARM64_SYS_REG(3, 0, 0, 2, 0)); | |
357 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, | |
358 | ARM64_SYS_REG(3, 0, 0, 2, 1)); | |
359 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, | |
360 | ARM64_SYS_REG(3, 0, 0, 2, 2)); | |
361 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, | |
362 | ARM64_SYS_REG(3, 0, 0, 2, 3)); | |
363 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, | |
364 | ARM64_SYS_REG(3, 0, 0, 2, 4)); | |
365 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, | |
366 | ARM64_SYS_REG(3, 0, 0, 2, 5)); | |
367 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, | |
368 | ARM64_SYS_REG(3, 0, 0, 2, 6)); | |
369 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, | |
370 | ARM64_SYS_REG(3, 0, 0, 2, 7)); | |
371 | ||
372 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, | |
373 | ARM64_SYS_REG(3, 0, 0, 3, 0)); | |
374 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, | |
375 | ARM64_SYS_REG(3, 0, 0, 3, 1)); | |
376 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, | |
377 | ARM64_SYS_REG(3, 0, 0, 3, 2)); | |
378 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, | |
379 | ARM64_SYS_REG(3, 0, 0, 3, 4)); | |
380 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1, | |
381 | ARM64_SYS_REG(3, 0, 0, 3, 5)); | |
382 | err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5, | |
383 | ARM64_SYS_REG(3, 0, 0, 3, 6)); | |
384 | ||
385 | /* | |
386 | * DBGDIDR is a bit complicated because the kernel doesn't | |
387 | * provide an accessor for it in 64-bit mode, which is what this | |
388 | * scratch VM is in, and there's no architected "64-bit sysreg | |
389 | * which reads the same as the 32-bit register" the way there is | |
390 | * for other ID registers. Instead we synthesize a value from the | |
391 | * AArch64 ID_AA64DFR0, the same way the kernel code in | |
392 | * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. | |
393 | * We only do this if the CPU supports AArch32 at EL1. | |
394 | */ | |
395 | if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { | |
396 | int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); | |
397 | int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); | |
398 | int ctx_cmps = | |
399 | FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); | |
400 | int version = 6; /* ARMv8 debug architecture */ | |
401 | bool has_el3 = | |
402 | !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); | |
403 | uint32_t dbgdidr = 0; | |
404 | ||
405 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); | |
406 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); | |
407 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); | |
408 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); | |
409 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); | |
410 | dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); | |
411 | dbgdidr |= (1 << 15); /* RES1 bit */ | |
412 | ahcf->isar.dbgdidr = dbgdidr; | |
413 | } | |
414 | ||
415 | if (pmu_supported) { | |
416 | /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ | |
417 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, | |
418 | ARM64_SYS_REG(3, 3, 9, 12, 0)); | |
419 | } | |
420 | ||
421 | if (sve_supported) { | |
422 | /* | |
423 | * There is a range of kernels between kernel commit 73433762fcae | |
424 | * and f81cb2c3ad41 which have a bug where the kernel doesn't | |
425 | * expose SYS_ID_AA64ZFR0_EL1 via the ONE_REG API unless the VM has | |
426 | * enabled SVE support, which resulted in an error rather than RAZ. | |
427 | * So only read the register if we set KVM_ARM_VCPU_SVE above. | |
428 | */ | |
429 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, | |
430 | ARM64_SYS_REG(3, 0, 0, 4, 4)); | |
431 | } | |
432 | } | |
433 | ||
434 | kvm_arm_destroy_scratch_host_vcpu(fdarray); | |
435 | ||
436 | if (err < 0) { | |
437 | return false; | |
438 | } | |
439 | ||
440 | /* | |
441 | * We can assume any KVM supporting CPU is at least a v8 | |
442 | * with VFPv4+Neon; this in turn implies most of the other | |
443 | * feature bits. | |
444 | */ | |
445 | features |= 1ULL << ARM_FEATURE_V8; | |
446 | features |= 1ULL << ARM_FEATURE_NEON; | |
447 | features |= 1ULL << ARM_FEATURE_AARCH64; | |
448 | features |= 1ULL << ARM_FEATURE_PMU; | |
449 | features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; | |
450 | ||
451 | ahcf->features = features; | |
452 | ||
453 | return true; | |
454 | } | |
455 | ||
c4487d76 | 456 | void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) |
a96c0514 | 457 | { |
c4487d76 | 458 | CPUARMState *env = &cpu->env; |
a96c0514 | 459 | |
c4487d76 PM |
460 | if (!arm_host_cpu_features.dtb_compatible) { |
461 | if (!kvm_enabled() || | |
462 | !kvm_arm_get_host_cpu_features(&arm_host_cpu_features)) { | |
463 | /* We can't report this error yet, so flag that we need to | |
464 | * in arm_cpu_realizefn(). | |
465 | */ | |
466 | cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; | |
467 | cpu->host_cpu_probe_failed = true; | |
468 | return; | |
469 | } | |
a96c0514 | 470 | } |
c4487d76 PM |
471 | |
472 | cpu->kvm_target = arm_host_cpu_features.target; | |
473 | cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible; | |
4674097c | 474 | cpu->isar = arm_host_cpu_features.isar; |
c4487d76 | 475 | env->features = arm_host_cpu_features.features; |
a96c0514 PM |
476 | } |
477 | ||
dea101a1 AJ |
478 | static bool kvm_no_adjvtime_get(Object *obj, Error **errp) |
479 | { | |
480 | return !ARM_CPU(obj)->kvm_adjvtime; | |
481 | } | |
482 | ||
483 | static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) | |
484 | { | |
485 | ARM_CPU(obj)->kvm_adjvtime = !value; | |
486 | } | |
487 | ||
68970d1e AJ |
488 | static bool kvm_steal_time_get(Object *obj, Error **errp) |
489 | { | |
490 | return ARM_CPU(obj)->kvm_steal_time != ON_OFF_AUTO_OFF; | |
491 | } | |
492 | ||
493 | static void kvm_steal_time_set(Object *obj, bool value, Error **errp) | |
494 | { | |
495 | ARM_CPU(obj)->kvm_steal_time = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; | |
496 | } | |
497 | ||
dea101a1 | 498 | /* KVM VCPU properties should be prefixed with "kvm-". */ |
cac675b5 | 499 | void kvm_arm_add_vcpu_properties(ARMCPU *cpu) |
dea101a1 | 500 | { |
9e6f8d8a | 501 | CPUARMState *env = &cpu->env; |
cac675b5 | 502 | Object *obj = OBJECT(cpu); |
dea101a1 | 503 | |
9e6f8d8a | 504 | if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { |
505 | cpu->kvm_adjvtime = true; | |
506 | object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, | |
507 | kvm_no_adjvtime_set); | |
508 | object_property_set_description(obj, "kvm-no-adjvtime", | |
509 | "Set on to disable the adjustment of " | |
510 | "the virtual counter. VM stopped time " | |
511 | "will be counted."); | |
512 | } | |
68970d1e AJ |
513 | |
514 | cpu->kvm_steal_time = ON_OFF_AUTO_AUTO; | |
515 | object_property_add_bool(obj, "kvm-steal-time", kvm_steal_time_get, | |
516 | kvm_steal_time_set); | |
517 | object_property_set_description(obj, "kvm-steal-time", | |
518 | "Set off to disable KVM steal time."); | |
dea101a1 AJ |
519 | } |
520 | ||
7d20e681 | 521 | bool kvm_arm_pmu_supported(void) |
ae502508 | 522 | { |
7d20e681 | 523 | return kvm_check_extension(kvm_state, KVM_CAP_ARM_PMU_V3); |
ae502508 AJ |
524 | } |
525 | ||
bcb902a1 | 526 | int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) |
a27382e2 EA |
527 | { |
528 | KVMState *s = KVM_STATE(ms->accelerator); | |
529 | int ret; | |
530 | ||
531 | ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE); | |
bcb902a1 AJ |
532 | *fixed_ipa = ret <= 0; |
533 | ||
a27382e2 EA |
534 | return ret > 0 ? ret : 40; |
535 | } | |
536 | ||
5e0d6590 AO |
537 | int kvm_arch_get_default_type(MachineState *ms) |
538 | { | |
1ab445af AO |
539 | bool fixed_ipa; |
540 | int size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa); | |
541 | return fixed_ipa ? 0 : size; | |
5e0d6590 AO |
542 | } |
543 | ||
b16565b3 | 544 | int kvm_arch_init(MachineState *ms, KVMState *s) |
494b00c7 | 545 | { |
fff9f555 | 546 | int ret = 0; |
494b00c7 CD |
547 | /* For ARM interrupt delivery is always asynchronous, |
548 | * whether we are using an in-kernel VGIC or not. | |
549 | */ | |
550 | kvm_async_interrupts_allowed = true; | |
a96c0514 | 551 | |
5d721b78 AG |
552 | /* |
553 | * PSCI wakes up secondary cores, so we always need to | |
554 | * have vCPUs waiting in kernel space | |
555 | */ | |
556 | kvm_halt_in_kernel_allowed = true; | |
557 | ||
1a1753f7 AB |
558 | cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); |
559 | ||
51641de4 RH |
560 | /* Check whether user space can specify guest syndrome value */ |
561 | cap_has_inject_serror_esr = | |
562 | kvm_check_extension(s, KVM_CAP_ARM_INJECT_SERROR_ESR); | |
563 | ||
fff9f555 EA |
564 | if (ms->smp.cpus > 256 && |
565 | !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { | |
566 | error_report("Using more than 256 vcpus requires a host kernel " | |
567 | "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2"); | |
568 | ret = -EINVAL; | |
569 | } | |
570 | ||
694bcaa8 BM |
571 | if (kvm_check_extension(s, KVM_CAP_ARM_NISV_TO_USER)) { |
572 | if (kvm_vm_enable_cap(s, KVM_CAP_ARM_NISV_TO_USER, 0)) { | |
573 | error_report("Failed to enable KVM_CAP_ARM_NISV_TO_USER cap"); | |
574 | } else { | |
575 | /* Set status for supporting the external dabt injection */ | |
576 | cap_has_inject_ext_dabt = kvm_check_extension(s, | |
577 | KVM_CAP_ARM_INJECT_EXT_DABT); | |
578 | } | |
579 | } | |
580 | ||
c8f2eb5d SK |
581 | if (s->kvm_eager_split_size) { |
582 | uint32_t sizes; | |
583 | ||
584 | sizes = kvm_vm_check_extension(s, KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES); | |
585 | if (!sizes) { | |
586 | s->kvm_eager_split_size = 0; | |
587 | warn_report("Eager Page Split support not available"); | |
588 | } else if (!(s->kvm_eager_split_size & sizes)) { | |
589 | error_report("Eager Page Split requested chunk size not valid"); | |
590 | ret = -EINVAL; | |
591 | } else { | |
592 | ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0, | |
593 | s->kvm_eager_split_size); | |
594 | if (ret < 0) { | |
595 | error_report("Enabling of Eager Page Split failed: %s", | |
596 | strerror(-ret)); | |
597 | } | |
598 | } | |
599 | } | |
600 | ||
dd2157d2 RH |
601 | max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); |
602 | hw_watchpoints = g_array_sized_new(true, true, | |
603 | sizeof(HWWatchpoint), max_hw_wps); | |
604 | ||
605 | max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); | |
606 | hw_breakpoints = g_array_sized_new(true, true, | |
607 | sizeof(HWBreakpoint), max_hw_bps); | |
ad5c6dde | 608 | |
fff9f555 | 609 | return ret; |
494b00c7 CD |
610 | } |
611 | ||
612 | unsigned long kvm_arch_vcpu_id(CPUState *cpu) | |
613 | { | |
614 | return cpu->cpu_index; | |
615 | } | |
616 | ||
eb035b48 PM |
617 | /* We track all the KVM devices which need their memory addresses |
618 | * passing to the kernel in a list of these structures. | |
619 | * When board init is complete we run through the list and | |
620 | * tell the kernel the base addresses of the memory regions. | |
621 | * We use a MemoryListener to track mapping and unmapping of | |
622 | * the regions during board creation, so the board models don't | |
623 | * need to do anything special for the KVM case. | |
19d1bd0b EA |
624 | * |
625 | * Sometimes the address must be OR'ed with some other fields | |
626 | * (for example for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION). | |
627 | * @kda_addr_ormask aims at storing the value of those fields. | |
eb035b48 PM |
628 | */ |
629 | typedef struct KVMDevice { | |
630 | struct kvm_arm_device_addr kda; | |
1da41cc1 | 631 | struct kvm_device_attr kdattr; |
19d1bd0b | 632 | uint64_t kda_addr_ormask; |
eb035b48 PM |
633 | MemoryRegion *mr; |
634 | QSLIST_ENTRY(KVMDevice) entries; | |
1da41cc1 | 635 | int dev_fd; |
eb035b48 PM |
636 | } KVMDevice; |
637 | ||
b58deb34 | 638 | static QSLIST_HEAD(, KVMDevice) kvm_devices_head; |
eb035b48 PM |
639 | |
640 | static void kvm_arm_devlistener_add(MemoryListener *listener, | |
641 | MemoryRegionSection *section) | |
642 | { | |
643 | KVMDevice *kd; | |
644 | ||
645 | QSLIST_FOREACH(kd, &kvm_devices_head, entries) { | |
646 | if (section->mr == kd->mr) { | |
647 | kd->kda.addr = section->offset_within_address_space; | |
648 | } | |
649 | } | |
650 | } | |
651 | ||
652 | static void kvm_arm_devlistener_del(MemoryListener *listener, | |
653 | MemoryRegionSection *section) | |
654 | { | |
655 | KVMDevice *kd; | |
656 | ||
657 | QSLIST_FOREACH(kd, &kvm_devices_head, entries) { | |
658 | if (section->mr == kd->mr) { | |
659 | kd->kda.addr = -1; | |
660 | } | |
661 | } | |
662 | } | |
663 | ||
664 | static MemoryListener devlistener = { | |
142518bd | 665 | .name = "kvm-arm", |
eb035b48 PM |
666 | .region_add = kvm_arm_devlistener_add, |
667 | .region_del = kvm_arm_devlistener_del, | |
14a868c6 | 668 | .priority = MEMORY_LISTENER_PRIORITY_MIN, |
eb035b48 PM |
669 | }; |
670 | ||
1da41cc1 CD |
671 | static void kvm_arm_set_device_addr(KVMDevice *kd) |
672 | { | |
673 | struct kvm_device_attr *attr = &kd->kdattr; | |
674 | int ret; | |
675 | ||
676 | /* If the device control API is available and we have a device fd on the | |
677 | * KVMDevice struct, let's use the newer API | |
678 | */ | |
679 | if (kd->dev_fd >= 0) { | |
680 | uint64_t addr = kd->kda.addr; | |
19d1bd0b EA |
681 | |
682 | addr |= kd->kda_addr_ormask; | |
1da41cc1 CD |
683 | attr->addr = (uintptr_t)&addr; |
684 | ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); | |
685 | } else { | |
686 | ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda); | |
687 | } | |
688 | ||
689 | if (ret < 0) { | |
690 | fprintf(stderr, "Failed to set device address: %s\n", | |
691 | strerror(-ret)); | |
692 | abort(); | |
693 | } | |
694 | } | |
695 | ||
eb035b48 PM |
696 | static void kvm_arm_machine_init_done(Notifier *notifier, void *data) |
697 | { | |
698 | KVMDevice *kd, *tkd; | |
699 | ||
eb035b48 PM |
700 | QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) { |
701 | if (kd->kda.addr != -1) { | |
1da41cc1 | 702 | kvm_arm_set_device_addr(kd); |
eb035b48 | 703 | } |
dfde4e6e | 704 | memory_region_unref(kd->mr); |
5ff9aaab | 705 | QSLIST_REMOVE_HEAD(&kvm_devices_head, entries); |
eb035b48 PM |
706 | g_free(kd); |
707 | } | |
0bbe4354 | 708 | memory_listener_unregister(&devlistener); |
eb035b48 PM |
709 | } |
710 | ||
711 | static Notifier notify = { | |
712 | .notify = kvm_arm_machine_init_done, | |
713 | }; | |
714 | ||
1da41cc1 | 715 | void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, |
19d1bd0b | 716 | uint64_t attr, int dev_fd, uint64_t addr_ormask) |
eb035b48 PM |
717 | { |
718 | KVMDevice *kd; | |
719 | ||
720 | if (!kvm_irqchip_in_kernel()) { | |
721 | return; | |
722 | } | |
723 | ||
724 | if (QSLIST_EMPTY(&kvm_devices_head)) { | |
4344af65 | 725 | memory_listener_register(&devlistener, &address_space_memory); |
eb035b48 PM |
726 | qemu_add_machine_init_done_notifier(¬ify); |
727 | } | |
728 | kd = g_new0(KVMDevice, 1); | |
729 | kd->mr = mr; | |
730 | kd->kda.id = devid; | |
731 | kd->kda.addr = -1; | |
1da41cc1 CD |
732 | kd->kdattr.flags = 0; |
733 | kd->kdattr.group = group; | |
734 | kd->kdattr.attr = attr; | |
735 | kd->dev_fd = dev_fd; | |
19d1bd0b | 736 | kd->kda_addr_ormask = addr_ormask; |
eb035b48 | 737 | QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries); |
dfde4e6e | 738 | memory_region_ref(kd->mr); |
eb035b48 PM |
739 | } |
740 | ||
38df27c8 AB |
741 | static int compare_u64(const void *a, const void *b) |
742 | { | |
743 | if (*(uint64_t *)a > *(uint64_t *)b) { | |
744 | return 1; | |
745 | } | |
746 | if (*(uint64_t *)a < *(uint64_t *)b) { | |
747 | return -1; | |
748 | } | |
749 | return 0; | |
750 | } | |
751 | ||
e5ac4200 AJ |
752 | /* |
753 | * cpreg_values are sorted in ascending order by KVM register ID | |
754 | * (see kvm_arm_init_cpreg_list). This allows us to cheaply find | |
755 | * the storage for a KVM register by ID with a binary search. | |
756 | */ | |
757 | static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) | |
758 | { | |
759 | uint64_t *res; | |
760 | ||
761 | res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, | |
762 | sizeof(uint64_t), compare_u64); | |
763 | assert(res); | |
764 | ||
765 | return &cpu->cpreg_values[res - cpu->cpreg_indexes]; | |
766 | } | |
767 | ||
f38ce925 RH |
768 | /** |
769 | * kvm_arm_reg_syncs_via_cpreg_list: | |
770 | * @regidx: KVM register index | |
771 | * | |
772 | * Return true if this KVM register should be synchronized via the | |
773 | * cpreg list of arbitrary system registers, false if it is synchronized | |
774 | * by hand using code in kvm_arch_get/put_registers(). | |
775 | */ | |
776 | static bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) | |
777 | { | |
778 | switch (regidx & KVM_REG_ARM_COPROC_MASK) { | |
779 | case KVM_REG_ARM_CORE: | |
780 | case KVM_REG_ARM64_SVE: | |
781 | return false; | |
782 | default: | |
783 | return true; | |
784 | } | |
785 | } | |
786 | ||
09ddc012 RH |
787 | /** |
788 | * kvm_arm_init_cpreg_list: | |
789 | * @cpu: ARMCPU | |
790 | * | |
791 | * Initialize the ARMCPU cpreg list according to the kernel's | |
38df27c8 AB |
792 | * definition of what CPU registers it knows about (and throw away |
793 | * the previous TCG-created cpreg list). | |
09ddc012 RH |
794 | * |
795 | * Returns: 0 if success, else < 0 error code | |
38df27c8 | 796 | */ |
09ddc012 | 797 | static int kvm_arm_init_cpreg_list(ARMCPU *cpu) |
38df27c8 AB |
798 | { |
799 | struct kvm_reg_list rl; | |
800 | struct kvm_reg_list *rlp; | |
801 | int i, ret, arraylen; | |
802 | CPUState *cs = CPU(cpu); | |
803 | ||
804 | rl.n = 0; | |
805 | ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl); | |
806 | if (ret != -E2BIG) { | |
807 | return ret; | |
808 | } | |
809 | rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t)); | |
810 | rlp->n = rl.n; | |
811 | ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp); | |
812 | if (ret) { | |
813 | goto out; | |
814 | } | |
815 | /* Sort the list we get back from the kernel, since cpreg_tuples | |
816 | * must be in strictly ascending order. | |
817 | */ | |
818 | qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64); | |
819 | ||
820 | for (i = 0, arraylen = 0; i < rlp->n; i++) { | |
821 | if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) { | |
822 | continue; | |
823 | } | |
824 | switch (rlp->reg[i] & KVM_REG_SIZE_MASK) { | |
825 | case KVM_REG_SIZE_U32: | |
826 | case KVM_REG_SIZE_U64: | |
827 | break; | |
828 | default: | |
829 | fprintf(stderr, "Can't handle size of register in kernel list\n"); | |
830 | ret = -EINVAL; | |
831 | goto out; | |
832 | } | |
833 | ||
834 | arraylen++; | |
835 | } | |
836 | ||
837 | cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen); | |
838 | cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen); | |
839 | cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes, | |
840 | arraylen); | |
841 | cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values, | |
842 | arraylen); | |
843 | cpu->cpreg_array_len = arraylen; | |
844 | cpu->cpreg_vmstate_array_len = arraylen; | |
845 | ||
846 | for (i = 0, arraylen = 0; i < rlp->n; i++) { | |
847 | uint64_t regidx = rlp->reg[i]; | |
848 | if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) { | |
849 | continue; | |
850 | } | |
851 | cpu->cpreg_indexes[arraylen] = regidx; | |
852 | arraylen++; | |
853 | } | |
854 | assert(cpu->cpreg_array_len == arraylen); | |
855 | ||
856 | if (!write_kvmstate_to_list(cpu)) { | |
857 | /* Shouldn't happen unless kernel is inconsistent about | |
858 | * what registers exist. | |
859 | */ | |
860 | fprintf(stderr, "Initial read of kernel register state failed\n"); | |
861 | ret = -EINVAL; | |
862 | goto out; | |
863 | } | |
864 | ||
865 | out: | |
866 | g_free(rlp); | |
867 | return ret; | |
868 | } | |
869 | ||
676fe684 RH |
870 | /** |
871 | * kvm_arm_cpreg_level: | |
872 | * @regidx: KVM register index | |
873 | * | |
874 | * Return the level of this coprocessor/system register. Return value is | |
875 | * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. | |
876 | */ | |
877 | static int kvm_arm_cpreg_level(uint64_t regidx) | |
878 | { | |
879 | /* | |
880 | * All system registers are assumed to be level KVM_PUT_RUNTIME_STATE. | |
881 | * If a register should be written less often, you must add it here | |
882 | * with a state of either KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. | |
883 | */ | |
884 | switch (regidx) { | |
885 | case KVM_REG_ARM_TIMER_CNT: | |
886 | case KVM_REG_ARM_PTIMER_CNT: | |
887 | return KVM_PUT_FULL_STATE; | |
888 | } | |
889 | return KVM_PUT_RUNTIME_STATE; | |
890 | } | |
891 | ||
ff047453 PM |
892 | bool write_kvmstate_to_list(ARMCPU *cpu) |
893 | { | |
894 | CPUState *cs = CPU(cpu); | |
895 | int i; | |
896 | bool ok = true; | |
897 | ||
898 | for (i = 0; i < cpu->cpreg_array_len; i++) { | |
ff047453 PM |
899 | uint64_t regidx = cpu->cpreg_indexes[i]; |
900 | uint32_t v32; | |
901 | int ret; | |
902 | ||
ff047453 PM |
903 | switch (regidx & KVM_REG_SIZE_MASK) { |
904 | case KVM_REG_SIZE_U32: | |
40d45b85 | 905 | ret = kvm_get_one_reg(cs, regidx, &v32); |
ff047453 PM |
906 | if (!ret) { |
907 | cpu->cpreg_values[i] = v32; | |
908 | } | |
909 | break; | |
910 | case KVM_REG_SIZE_U64: | |
40d45b85 | 911 | ret = kvm_get_one_reg(cs, regidx, cpu->cpreg_values + i); |
ff047453 PM |
912 | break; |
913 | default: | |
d385a605 | 914 | g_assert_not_reached(); |
ff047453 PM |
915 | } |
916 | if (ret) { | |
917 | ok = false; | |
918 | } | |
919 | } | |
920 | return ok; | |
921 | } | |
922 | ||
4b7a6bf4 | 923 | bool write_list_to_kvmstate(ARMCPU *cpu, int level) |
ff047453 PM |
924 | { |
925 | CPUState *cs = CPU(cpu); | |
926 | int i; | |
927 | bool ok = true; | |
928 | ||
929 | for (i = 0; i < cpu->cpreg_array_len; i++) { | |
ff047453 PM |
930 | uint64_t regidx = cpu->cpreg_indexes[i]; |
931 | uint32_t v32; | |
932 | int ret; | |
933 | ||
4b7a6bf4 CD |
934 | if (kvm_arm_cpreg_level(regidx) > level) { |
935 | continue; | |
936 | } | |
937 | ||
ff047453 PM |
938 | switch (regidx & KVM_REG_SIZE_MASK) { |
939 | case KVM_REG_SIZE_U32: | |
940 | v32 = cpu->cpreg_values[i]; | |
6c8b9a74 | 941 | ret = kvm_set_one_reg(cs, regidx, &v32); |
ff047453 PM |
942 | break; |
943 | case KVM_REG_SIZE_U64: | |
6c8b9a74 | 944 | ret = kvm_set_one_reg(cs, regidx, cpu->cpreg_values + i); |
ff047453 PM |
945 | break; |
946 | default: | |
d385a605 | 947 | g_assert_not_reached(); |
ff047453 | 948 | } |
ff047453 PM |
949 | if (ret) { |
950 | /* We might fail for "unknown register" and also for | |
951 | * "you tried to set a register which is constant with | |
952 | * a different value from what it actually contains". | |
953 | */ | |
954 | ok = false; | |
955 | } | |
956 | } | |
957 | return ok; | |
958 | } | |
959 | ||
e5ac4200 AJ |
960 | void kvm_arm_cpu_pre_save(ARMCPU *cpu) |
961 | { | |
962 | /* KVM virtual time adjustment */ | |
963 | if (cpu->kvm_vtime_dirty) { | |
964 | *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; | |
965 | } | |
966 | } | |
967 | ||
968 | void kvm_arm_cpu_post_load(ARMCPU *cpu) | |
969 | { | |
970 | /* KVM virtual time adjustment */ | |
971 | if (cpu->kvm_adjvtime) { | |
972 | cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); | |
973 | cpu->kvm_vtime_dirty = true; | |
974 | } | |
975 | } | |
976 | ||
38df27c8 AB |
977 | void kvm_arm_reset_vcpu(ARMCPU *cpu) |
978 | { | |
25f2895e CD |
979 | int ret; |
980 | ||
38df27c8 AB |
981 | /* Re-init VCPU so that all registers are set to |
982 | * their respective reset values. | |
983 | */ | |
bbb22d58 | 984 | ret = kvm_arm_vcpu_init(cpu); |
25f2895e CD |
985 | if (ret < 0) { |
986 | fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); | |
987 | abort(); | |
988 | } | |
989 | if (!write_kvmstate_to_list(cpu)) { | |
990 | fprintf(stderr, "write_kvmstate_to_list failed\n"); | |
991 | abort(); | |
992 | } | |
b698e4ee PM |
993 | /* |
994 | * Sync the reset values also into the CPUState. This is necessary | |
995 | * because the next thing we do will be a kvm_arch_put_registers() | |
996 | * which will update the list values from the CPUState before copying | |
997 | * the list values back to KVM. It's OK to ignore failure returns here | |
998 | * for the same reason we do so in kvm_arch_get_registers(). | |
999 | */ | |
1000 | write_list_to_cpustate(cpu); | |
38df27c8 AB |
1001 | } |
1002 | ||
1a1753f7 AB |
1003 | /* |
1004 | * Update KVM's MP_STATE based on what QEMU thinks it is | |
1005 | */ | |
71c34911 | 1006 | static int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu) |
1a1753f7 AB |
1007 | { |
1008 | if (cap_has_mp_state) { | |
1009 | struct kvm_mp_state mp_state = { | |
062ba099 AB |
1010 | .mp_state = (cpu->power_state == PSCI_OFF) ? |
1011 | KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE | |
1a1753f7 | 1012 | }; |
71c34911 | 1013 | return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); |
1a1753f7 | 1014 | } |
1a1753f7 AB |
1015 | return 0; |
1016 | } | |
1017 | ||
1018 | /* | |
1019 | * Sync the KVM MP_STATE into QEMU | |
1020 | */ | |
71c34911 | 1021 | static int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) |
1a1753f7 AB |
1022 | { |
1023 | if (cap_has_mp_state) { | |
1024 | struct kvm_mp_state mp_state; | |
1025 | int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state); | |
1026 | if (ret) { | |
71c34911 | 1027 | return ret; |
1a1753f7 | 1028 | } |
062ba099 AB |
1029 | cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ? |
1030 | PSCI_OFF : PSCI_ON; | |
1a1753f7 | 1031 | } |
1a1753f7 AB |
1032 | return 0; |
1033 | } | |
1034 | ||
46512471 RH |
1035 | /** |
1036 | * kvm_arm_get_virtual_time: | |
76acc987 | 1037 | * @cpu: ARMCPU |
46512471 RH |
1038 | * |
1039 | * Gets the VCPU's virtual counter and stores it in the KVM CPU state. | |
1040 | */ | |
76acc987 | 1041 | static void kvm_arm_get_virtual_time(ARMCPU *cpu) |
e5ac4200 | 1042 | { |
e5ac4200 AJ |
1043 | int ret; |
1044 | ||
1045 | if (cpu->kvm_vtime_dirty) { | |
1046 | return; | |
1047 | } | |
1048 | ||
76acc987 | 1049 | ret = kvm_get_one_reg(CPU(cpu), KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); |
e5ac4200 AJ |
1050 | if (ret) { |
1051 | error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); | |
1052 | abort(); | |
1053 | } | |
1054 | ||
1055 | cpu->kvm_vtime_dirty = true; | |
1056 | } | |
1057 | ||
46512471 RH |
1058 | /** |
1059 | * kvm_arm_put_virtual_time: | |
76acc987 | 1060 | * @cpu: ARMCPU |
46512471 RH |
1061 | * |
1062 | * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. | |
1063 | */ | |
76acc987 | 1064 | static void kvm_arm_put_virtual_time(ARMCPU *cpu) |
e5ac4200 | 1065 | { |
e5ac4200 AJ |
1066 | int ret; |
1067 | ||
1068 | if (!cpu->kvm_vtime_dirty) { | |
1069 | return; | |
1070 | } | |
1071 | ||
76acc987 | 1072 | ret = kvm_set_one_reg(CPU(cpu), KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); |
e5ac4200 AJ |
1073 | if (ret) { |
1074 | error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); | |
1075 | abort(); | |
1076 | } | |
1077 | ||
1078 | cpu->kvm_vtime_dirty = false; | |
1079 | } | |
1080 | ||
353e03cd RH |
1081 | /** |
1082 | * kvm_put_vcpu_events: | |
1083 | * @cpu: ARMCPU | |
1084 | * | |
1085 | * Put VCPU related state to kvm. | |
1086 | * | |
1087 | * Returns: 0 if success else < 0 error code | |
1088 | */ | |
1089 | static int kvm_put_vcpu_events(ARMCPU *cpu) | |
202ccb6b DG |
1090 | { |
1091 | CPUARMState *env = &cpu->env; | |
1092 | struct kvm_vcpu_events events; | |
1093 | int ret; | |
1094 | ||
1095 | if (!kvm_has_vcpu_events()) { | |
1096 | return 0; | |
1097 | } | |
1098 | ||
1099 | memset(&events, 0, sizeof(events)); | |
1100 | events.exception.serror_pending = env->serror.pending; | |
1101 | ||
1102 | /* Inject SError to guest with specified syndrome if host kernel | |
1103 | * supports it, otherwise inject SError without syndrome. | |
1104 | */ | |
1105 | if (cap_has_inject_serror_esr) { | |
1106 | events.exception.serror_has_esr = env->serror.has_esr; | |
1107 | events.exception.serror_esr = env->serror.esr; | |
1108 | } | |
1109 | ||
1110 | ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); | |
1111 | if (ret) { | |
1112 | error_report("failed to put vcpu events"); | |
1113 | } | |
1114 | ||
1115 | return ret; | |
1116 | } | |
1117 | ||
353e03cd RH |
1118 | /** |
1119 | * kvm_get_vcpu_events: | |
1120 | * @cpu: ARMCPU | |
1121 | * | |
1122 | * Get VCPU related state from kvm. | |
1123 | * | |
1124 | * Returns: 0 if success else < 0 error code | |
1125 | */ | |
1126 | static int kvm_get_vcpu_events(ARMCPU *cpu) | |
202ccb6b DG |
1127 | { |
1128 | CPUARMState *env = &cpu->env; | |
1129 | struct kvm_vcpu_events events; | |
1130 | int ret; | |
1131 | ||
1132 | if (!kvm_has_vcpu_events()) { | |
1133 | return 0; | |
1134 | } | |
1135 | ||
1136 | memset(&events, 0, sizeof(events)); | |
1137 | ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); | |
1138 | if (ret) { | |
1139 | error_report("failed to get vcpu events"); | |
1140 | return ret; | |
1141 | } | |
1142 | ||
1143 | env->serror.pending = events.exception.serror_pending; | |
1144 | env->serror.has_esr = events.exception.serror_has_esr; | |
1145 | env->serror.esr = events.exception.serror_esr; | |
1146 | ||
1147 | return 0; | |
1148 | } | |
1149 | ||
20c83dc9 RH |
1150 | #define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0) |
1151 | #define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2) | |
1152 | ||
1153 | /* | |
1154 | * ESR_EL1 | |
1155 | * ISS encoding | |
1156 | * AARCH64: DFSC, bits [5:0] | |
1157 | * AARCH32: | |
1158 | * TTBCR.EAE == 0 | |
1159 | * FS[4] - DFSR[10] | |
1160 | * FS[3:0] - DFSR[3:0] | |
1161 | * TTBCR.EAE == 1 | |
1162 | * FS, bits [5:0] | |
1163 | */ | |
1164 | #define ESR_DFSC(aarch64, lpae, v) \ | |
1165 | ((aarch64 || (lpae)) ? ((v) & 0x3F) \ | |
1166 | : (((v) >> 6) | ((v) & 0x1F))) | |
1167 | ||
1168 | #define ESR_DFSC_EXTABT(aarch64, lpae) \ | |
1169 | ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8) | |
1170 | ||
1171 | /** | |
1172 | * kvm_arm_verify_ext_dabt_pending: | |
1173 | * @cs: CPUState | |
1174 | * | |
1175 | * Verify the fault status code wrt the Ext DABT injection | |
1176 | * | |
1177 | * Returns: true if the fault status code is as expected, false otherwise | |
1178 | */ | |
1179 | static bool kvm_arm_verify_ext_dabt_pending(CPUState *cs) | |
1180 | { | |
1181 | uint64_t dfsr_val; | |
1182 | ||
1183 | if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) { | |
1184 | ARMCPU *cpu = ARM_CPU(cs); | |
1185 | CPUARMState *env = &cpu->env; | |
1186 | int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64); | |
1187 | int lpae = 0; | |
1188 | ||
1189 | if (!aarch64_mode) { | |
1190 | uint64_t ttbcr; | |
1191 | ||
1192 | if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) { | |
1193 | lpae = arm_feature(env, ARM_FEATURE_LPAE) | |
1194 | && (ttbcr & TTBCR_EAE); | |
1195 | } | |
1196 | } | |
1197 | /* | |
1198 | * The verification here is based on the DFSC bits | |
1199 | * of the ESR_EL1 reg only | |
1200 | */ | |
1201 | return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) == | |
1202 | ESR_DFSC_EXTABT(aarch64_mode, lpae)); | |
1203 | } | |
1204 | return false; | |
1205 | } | |
1206 | ||
494b00c7 CD |
1207 | void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) |
1208 | { | |
1711bfa5 BM |
1209 | ARMCPU *cpu = ARM_CPU(cs); |
1210 | CPUARMState *env = &cpu->env; | |
1211 | ||
1212 | if (unlikely(env->ext_dabt_raised)) { | |
1213 | /* | |
1214 | * Verifying that the ext DABT has been properly injected, | |
1215 | * otherwise risking indefinitely re-running the faulting instruction | |
1216 | * Covering a very narrow case for kernels 5.5..5.5.4 | |
1217 | * when injected abort was misconfigured to be | |
1218 | * an IMPLEMENTATION DEFINED exception (for 32-bit EL1) | |
1219 | */ | |
1220 | if (!arm_feature(env, ARM_FEATURE_AARCH64) && | |
1221 | unlikely(!kvm_arm_verify_ext_dabt_pending(cs))) { | |
1222 | ||
1223 | error_report("Data abort exception with no valid ISS generated by " | |
1224 | "guest memory access. KVM unable to emulate faulting " | |
1225 | "instruction. Failed to inject an external data abort " | |
1226 | "into the guest."); | |
1227 | abort(); | |
1228 | } | |
1229 | /* Clear the status */ | |
1230 | env->ext_dabt_raised = 0; | |
1231 | } | |
494b00c7 CD |
1232 | } |
1233 | ||
4c663752 | 1234 | MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) |
494b00c7 | 1235 | { |
5d721b78 AG |
1236 | ARMCPU *cpu; |
1237 | uint32_t switched_level; | |
1238 | ||
1239 | if (kvm_irqchip_in_kernel()) { | |
1240 | /* | |
1241 | * We only need to sync timer states with user-space interrupt | |
1242 | * controllers, so return early and save cycles if we don't. | |
1243 | */ | |
1244 | return MEMTXATTRS_UNSPECIFIED; | |
1245 | } | |
1246 | ||
1247 | cpu = ARM_CPU(cs); | |
1248 | ||
1249 | /* Synchronize our shadowed in-kernel device irq lines with the kvm ones */ | |
1250 | if (run->s.regs.device_irq_level != cpu->device_irq_level) { | |
1251 | switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level; | |
1252 | ||
1253 | qemu_mutex_lock_iothread(); | |
1254 | ||
1255 | if (switched_level & KVM_ARM_DEV_EL1_VTIMER) { | |
1256 | qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT], | |
1257 | !!(run->s.regs.device_irq_level & | |
1258 | KVM_ARM_DEV_EL1_VTIMER)); | |
1259 | switched_level &= ~KVM_ARM_DEV_EL1_VTIMER; | |
1260 | } | |
1261 | ||
1262 | if (switched_level & KVM_ARM_DEV_EL1_PTIMER) { | |
1263 | qemu_set_irq(cpu->gt_timer_outputs[GTIMER_PHYS], | |
1264 | !!(run->s.regs.device_irq_level & | |
1265 | KVM_ARM_DEV_EL1_PTIMER)); | |
1266 | switched_level &= ~KVM_ARM_DEV_EL1_PTIMER; | |
1267 | } | |
1268 | ||
b1659527 AJ |
1269 | if (switched_level & KVM_ARM_DEV_PMU) { |
1270 | qemu_set_irq(cpu->pmu_interrupt, | |
1271 | !!(run->s.regs.device_irq_level & KVM_ARM_DEV_PMU)); | |
1272 | switched_level &= ~KVM_ARM_DEV_PMU; | |
1273 | } | |
5d721b78 AG |
1274 | |
1275 | if (switched_level) { | |
1276 | qemu_log_mask(LOG_UNIMP, "%s: unhandled in-kernel device IRQ %x\n", | |
1277 | __func__, switched_level); | |
1278 | } | |
1279 | ||
1280 | /* We also mark unknown levels as processed to not waste cycles */ | |
1281 | cpu->device_irq_level = run->s.regs.device_irq_level; | |
1282 | qemu_mutex_unlock_iothread(); | |
1283 | } | |
1284 | ||
4c663752 | 1285 | return MEMTXATTRS_UNSPECIFIED; |
494b00c7 CD |
1286 | } |
1287 | ||
396b6c50 | 1288 | static void kvm_arm_vm_state_change(void *opaque, bool running, RunState state) |
e5ac4200 | 1289 | { |
76acc987 | 1290 | ARMCPU *cpu = opaque; |
e5ac4200 AJ |
1291 | |
1292 | if (running) { | |
1293 | if (cpu->kvm_adjvtime) { | |
76acc987 | 1294 | kvm_arm_put_virtual_time(cpu); |
e5ac4200 AJ |
1295 | } |
1296 | } else { | |
1297 | if (cpu->kvm_adjvtime) { | |
76acc987 | 1298 | kvm_arm_get_virtual_time(cpu); |
e5ac4200 AJ |
1299 | } |
1300 | } | |
1301 | } | |
2ecb2027 | 1302 | |
694bcaa8 BM |
1303 | /** |
1304 | * kvm_arm_handle_dabt_nisv: | |
1305 | * @cs: CPUState | |
1306 | * @esr_iss: ISS encoding (limited) for the exception from Data Abort | |
1307 | * ISV bit set to '0b0' -> no valid instruction syndrome | |
1308 | * @fault_ipa: faulting address for the synchronous data abort | |
1309 | * | |
1310 | * Returns: 0 if the exception has been handled, < 0 otherwise | |
1311 | */ | |
1312 | static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss, | |
1313 | uint64_t fault_ipa) | |
1314 | { | |
1711bfa5 BM |
1315 | ARMCPU *cpu = ARM_CPU(cs); |
1316 | CPUARMState *env = &cpu->env; | |
694bcaa8 BM |
1317 | /* |
1318 | * Request KVM to inject the external data abort into the guest | |
1319 | */ | |
1320 | if (cap_has_inject_ext_dabt) { | |
1321 | struct kvm_vcpu_events events = { }; | |
1322 | /* | |
1323 | * The external data abort event will be handled immediately by KVM | |
1324 | * using the address fault that triggered the exit on given VCPU. | |
1325 | * Requesting injection of the external data abort does not rely | |
1326 | * on any other VCPU state. Therefore, in this particular case, the VCPU | |
1327 | * synchronization can be exceptionally skipped. | |
1328 | */ | |
1329 | events.exception.ext_dabt_pending = 1; | |
1330 | /* KVM_CAP_ARM_INJECT_EXT_DABT implies KVM_CAP_VCPU_EVENTS */ | |
1711bfa5 BM |
1331 | if (!kvm_vcpu_ioctl(cs, KVM_SET_VCPU_EVENTS, &events)) { |
1332 | env->ext_dabt_raised = 1; | |
1333 | return 0; | |
1334 | } | |
694bcaa8 BM |
1335 | } else { |
1336 | error_report("Data abort exception triggered by guest memory access " | |
1337 | "at physical address: 0x" TARGET_FMT_lx, | |
1338 | (target_ulong)fault_ipa); | |
1339 | error_printf("KVM unable to emulate faulting instruction.\n"); | |
1340 | } | |
1341 | return -1; | |
1342 | } | |
1343 | ||
5cba8f26 RH |
1344 | /** |
1345 | * kvm_arm_handle_debug: | |
1346 | * @cs: CPUState | |
1347 | * @debug_exit: debug part of the KVM exit structure | |
1348 | * | |
1349 | * Returns: TRUE if the debug exception was handled. | |
1350 | * | |
1351 | * See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register | |
1352 | * | |
1353 | * To minimise translating between kernel and user-space the kernel | |
1354 | * ABI just provides user-space with the full exception syndrome | |
1355 | * register value to be decoded in QEMU. | |
1356 | */ | |
1357 | static bool kvm_arm_handle_debug(CPUState *cs, | |
1358 | struct kvm_debug_exit_arch *debug_exit) | |
1359 | { | |
1360 | int hsr_ec = syn_get_ec(debug_exit->hsr); | |
1361 | ARMCPU *cpu = ARM_CPU(cs); | |
1362 | CPUARMState *env = &cpu->env; | |
1363 | ||
1364 | /* Ensure PC is synchronised */ | |
1365 | kvm_cpu_synchronize_state(cs); | |
1366 | ||
1367 | switch (hsr_ec) { | |
1368 | case EC_SOFTWARESTEP: | |
1369 | if (cs->singlestep_enabled) { | |
1370 | return true; | |
1371 | } else { | |
1372 | /* | |
1373 | * The kernel should have suppressed the guest's ability to | |
1374 | * single step at this point so something has gone wrong. | |
1375 | */ | |
1376 | error_report("%s: guest single-step while debugging unsupported" | |
1377 | " (%"PRIx64", %"PRIx32")", | |
1378 | __func__, env->pc, debug_exit->hsr); | |
1379 | return false; | |
1380 | } | |
1381 | break; | |
1382 | case EC_AA64_BKPT: | |
1383 | if (kvm_find_sw_breakpoint(cs, env->pc)) { | |
1384 | return true; | |
1385 | } | |
1386 | break; | |
1387 | case EC_BREAKPOINT: | |
1388 | if (find_hw_breakpoint(cs, env->pc)) { | |
1389 | return true; | |
1390 | } | |
1391 | break; | |
1392 | case EC_WATCHPOINT: | |
1393 | { | |
1394 | CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); | |
1395 | if (wp) { | |
1396 | cs->watchpoint_hit = wp; | |
1397 | return true; | |
1398 | } | |
1399 | break; | |
1400 | } | |
1401 | default: | |
1402 | error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", | |
1403 | __func__, debug_exit->hsr, env->pc); | |
1404 | } | |
1405 | ||
1406 | /* If we are not handling the debug exception it must belong to | |
1407 | * the guest. Let's re-use the existing TCG interrupt code to set | |
1408 | * everything up properly. | |
1409 | */ | |
1410 | cs->exception_index = EXCP_BKPT; | |
1411 | env->exception.syndrome = debug_exit->hsr; | |
1412 | env->exception.vaddress = debug_exit->far; | |
1413 | env->exception.target_el = 1; | |
1414 | qemu_mutex_lock_iothread(); | |
1415 | arm_cpu_do_interrupt(cs); | |
1416 | qemu_mutex_unlock_iothread(); | |
1417 | ||
1418 | return false; | |
1419 | } | |
1420 | ||
494b00c7 CD |
1421 | int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) |
1422 | { | |
2ecb2027 AB |
1423 | int ret = 0; |
1424 | ||
1425 | switch (run->exit_reason) { | |
1426 | case KVM_EXIT_DEBUG: | |
1427 | if (kvm_arm_handle_debug(cs, &run->debug.arch)) { | |
1428 | ret = EXCP_DEBUG; | |
1429 | } /* otherwise return to guest */ | |
1430 | break; | |
694bcaa8 BM |
1431 | case KVM_EXIT_ARM_NISV: |
1432 | /* External DABT with no valid iss to decode */ | |
1433 | ret = kvm_arm_handle_dabt_nisv(cs, run->arm_nisv.esr_iss, | |
1434 | run->arm_nisv.fault_ipa); | |
1435 | break; | |
2ecb2027 AB |
1436 | default: |
1437 | qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", | |
1438 | __func__, run->exit_reason); | |
1439 | break; | |
1440 | } | |
1441 | return ret; | |
494b00c7 CD |
1442 | } |
1443 | ||
494b00c7 CD |
1444 | bool kvm_arch_stop_on_emulation_error(CPUState *cs) |
1445 | { | |
1446 | return true; | |
1447 | } | |
1448 | ||
1449 | int kvm_arch_process_async_events(CPUState *cs) | |
1450 | { | |
1451 | return 0; | |
1452 | } | |
1453 | ||
ea79c599 RH |
1454 | /** |
1455 | * kvm_arm_hw_debug_active: | |
1456 | * @cs: CPU State | |
1457 | * | |
1458 | * Return: TRUE if any hardware breakpoints in use. | |
1459 | */ | |
1460 | static bool kvm_arm_hw_debug_active(CPUState *cs) | |
1461 | { | |
1462 | return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); | |
1463 | } | |
1464 | ||
ec4145f7 RH |
1465 | /** |
1466 | * kvm_arm_copy_hw_debug_data: | |
1467 | * @ptr: kvm_guest_debug_arch structure | |
1468 | * | |
1469 | * Copy the architecture specific debug registers into the | |
1470 | * kvm_guest_debug ioctl structure. | |
1471 | */ | |
1472 | static void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) | |
1473 | { | |
1474 | int i; | |
1475 | memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); | |
1476 | ||
1477 | for (i = 0; i < max_hw_wps; i++) { | |
1478 | HWWatchpoint *wp = get_hw_wp(i); | |
1479 | ptr->dbg_wcr[i] = wp->wcr; | |
1480 | ptr->dbg_wvr[i] = wp->wvr; | |
1481 | } | |
1482 | for (i = 0; i < max_hw_bps; i++) { | |
1483 | HWBreakpoint *bp = get_hw_bp(i); | |
1484 | ptr->dbg_bcr[i] = bp->bcr; | |
1485 | ptr->dbg_bvr[i] = bp->bvr; | |
1486 | } | |
1487 | } | |
1488 | ||
494b00c7 CD |
1489 | void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) |
1490 | { | |
2ecb2027 AB |
1491 | if (kvm_sw_breakpoints_active(cs)) { |
1492 | dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; | |
1493 | } | |
e4482ab7 AB |
1494 | if (kvm_arm_hw_debug_active(cs)) { |
1495 | dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW; | |
1496 | kvm_arm_copy_hw_debug_data(&dbg->arch); | |
1497 | } | |
494b00c7 | 1498 | } |
b3a1c626 AK |
1499 | |
1500 | void kvm_arch_init_irq_routing(KVMState *s) | |
1501 | { | |
1502 | } | |
1da41cc1 | 1503 | |
4376c40d | 1504 | int kvm_arch_irqchip_create(KVMState *s) |
1da41cc1 | 1505 | { |
4376c40d | 1506 | if (kvm_kernel_irqchip_split()) { |
47c182fe | 1507 | error_report("-machine kernel_irqchip=split is not supported on ARM."); |
4376c40d | 1508 | exit(1); |
15eafc2e PB |
1509 | } |
1510 | ||
1da41cc1 CD |
1511 | /* If we can create the VGIC using the newer device control API, we |
1512 | * let the device do this when it initializes itself, otherwise we | |
1513 | * fall back to the old API */ | |
34e85cd9 PF |
1514 | return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); |
1515 | } | |
1da41cc1 | 1516 | |
34e85cd9 PF |
1517 | int kvm_arm_vgic_probe(void) |
1518 | { | |
d45efe47 EA |
1519 | int val = 0; |
1520 | ||
34e85cd9 PF |
1521 | if (kvm_create_device(kvm_state, |
1522 | KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) { | |
d45efe47 EA |
1523 | val |= KVM_ARM_VGIC_V3; |
1524 | } | |
1525 | if (kvm_create_device(kvm_state, | |
1526 | KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) { | |
1527 | val |= KVM_ARM_VGIC_V2; | |
1da41cc1 | 1528 | } |
d45efe47 | 1529 | return val; |
1da41cc1 | 1530 | } |
9e03a040 | 1531 | |
f6530926 EA |
1532 | int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level) |
1533 | { | |
1534 | int kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) | irq; | |
1535 | int cpu_idx1 = cpu % 256; | |
1536 | int cpu_idx2 = cpu / 256; | |
1537 | ||
1538 | kvm_irq |= (cpu_idx1 << KVM_ARM_IRQ_VCPU_SHIFT) | | |
1539 | (cpu_idx2 << KVM_ARM_IRQ_VCPU2_SHIFT); | |
1540 | ||
1541 | return kvm_set_irq(kvm_state, kvm_irq, !!level); | |
1542 | } | |
1543 | ||
9e03a040 | 1544 | int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, |
dc9f06ca | 1545 | uint64_t address, uint32_t data, PCIDevice *dev) |
9e03a040 | 1546 | { |
b05c81d2 EA |
1547 | AddressSpace *as = pci_device_iommu_address_space(dev); |
1548 | hwaddr xlat, len, doorbell_gpa; | |
1549 | MemoryRegionSection mrs; | |
1550 | MemoryRegion *mr; | |
b05c81d2 EA |
1551 | |
1552 | if (as == &address_space_memory) { | |
1553 | return 0; | |
1554 | } | |
1555 | ||
1556 | /* MSI doorbell address is translated by an IOMMU */ | |
1557 | ||
dfa0d9b8 HM |
1558 | RCU_READ_LOCK_GUARD(); |
1559 | ||
bc6b1cec PM |
1560 | mr = address_space_translate(as, address, &xlat, &len, true, |
1561 | MEMTXATTRS_UNSPECIFIED); | |
dfa0d9b8 | 1562 | |
b05c81d2 | 1563 | if (!mr) { |
dfa0d9b8 | 1564 | return 1; |
b05c81d2 | 1565 | } |
dfa0d9b8 | 1566 | |
b05c81d2 | 1567 | mrs = memory_region_find(mr, xlat, 1); |
dfa0d9b8 | 1568 | |
b05c81d2 | 1569 | if (!mrs.mr) { |
dfa0d9b8 | 1570 | return 1; |
b05c81d2 EA |
1571 | } |
1572 | ||
1573 | doorbell_gpa = mrs.offset_within_address_space; | |
1574 | memory_region_unref(mrs.mr); | |
1575 | ||
1576 | route->u.msi.address_lo = doorbell_gpa; | |
1577 | route->u.msi.address_hi = doorbell_gpa >> 32; | |
1578 | ||
1579 | trace_kvm_arm_fixup_msi_route(address, doorbell_gpa); | |
1580 | ||
dfa0d9b8 | 1581 | return 0; |
9e03a040 | 1582 | } |
1850b6b7 | 1583 | |
38d87493 PX |
1584 | int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, |
1585 | int vector, PCIDevice *dev) | |
1586 | { | |
1587 | return 0; | |
1588 | } | |
1589 | ||
1590 | int kvm_arch_release_virq_post(int virq) | |
1591 | { | |
1592 | return 0; | |
1593 | } | |
1594 | ||
1850b6b7 EA |
1595 | int kvm_arch_msi_data_to_gsi(uint32_t data) |
1596 | { | |
1597 | return (data - 32) & 0xffff; | |
1598 | } | |
92a5199b TL |
1599 | |
1600 | bool kvm_arch_cpu_check_are_resettable(void) | |
1601 | { | |
1602 | return true; | |
1603 | } | |
3dba0a33 | 1604 | |
c8f2eb5d SK |
1605 | static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, |
1606 | const char *name, void *opaque, | |
1607 | Error **errp) | |
1608 | { | |
1609 | KVMState *s = KVM_STATE(obj); | |
1610 | uint64_t value = s->kvm_eager_split_size; | |
1611 | ||
1612 | visit_type_size(v, name, &value, errp); | |
1613 | } | |
1614 | ||
1615 | static void kvm_arch_set_eager_split_size(Object *obj, Visitor *v, | |
1616 | const char *name, void *opaque, | |
1617 | Error **errp) | |
1618 | { | |
1619 | KVMState *s = KVM_STATE(obj); | |
1620 | uint64_t value; | |
1621 | ||
1622 | if (s->fd != -1) { | |
1623 | error_setg(errp, "Unable to set early-split-size after KVM has been initialized"); | |
1624 | return; | |
1625 | } | |
1626 | ||
1627 | if (!visit_type_size(v, name, &value, errp)) { | |
1628 | return; | |
1629 | } | |
1630 | ||
1631 | if (value && !is_power_of_2(value)) { | |
1632 | error_setg(errp, "early-split-size must be a power of two"); | |
1633 | return; | |
1634 | } | |
1635 | ||
1636 | s->kvm_eager_split_size = value; | |
1637 | } | |
1638 | ||
3dba0a33 PB |
1639 | void kvm_arch_accel_class_init(ObjectClass *oc) |
1640 | { | |
c8f2eb5d SK |
1641 | object_class_property_add(oc, "eager-split-size", "size", |
1642 | kvm_arch_get_eager_split_size, | |
1643 | kvm_arch_set_eager_split_size, NULL, NULL); | |
1644 | ||
1645 | object_class_property_set_description(oc, "eager-split-size", | |
1646 | "Eager Page Split chunk size for hugepages. (default: 0, disabled)"); | |
3dba0a33 | 1647 | } |
de3c9601 RH |
1648 | |
1649 | int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) | |
1650 | { | |
1651 | switch (type) { | |
1652 | case GDB_BREAKPOINT_HW: | |
1653 | return insert_hw_breakpoint(addr); | |
1654 | break; | |
1655 | case GDB_WATCHPOINT_READ: | |
1656 | case GDB_WATCHPOINT_WRITE: | |
1657 | case GDB_WATCHPOINT_ACCESS: | |
1658 | return insert_hw_watchpoint(addr, len, type); | |
1659 | default: | |
1660 | return -ENOSYS; | |
1661 | } | |
1662 | } | |
1663 | ||
1664 | int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) | |
1665 | { | |
1666 | switch (type) { | |
1667 | case GDB_BREAKPOINT_HW: | |
1668 | return delete_hw_breakpoint(addr); | |
1669 | case GDB_WATCHPOINT_READ: | |
1670 | case GDB_WATCHPOINT_WRITE: | |
1671 | case GDB_WATCHPOINT_ACCESS: | |
1672 | return delete_hw_watchpoint(addr, len, type); | |
1673 | default: | |
1674 | return -ENOSYS; | |
1675 | } | |
1676 | } | |
1677 | ||
1678 | void kvm_arch_remove_all_hw_breakpoints(void) | |
1679 | { | |
1680 | if (cur_hw_wps > 0) { | |
1681 | g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); | |
1682 | } | |
1683 | if (cur_hw_bps > 0) { | |
1684 | g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); | |
1685 | } | |
1686 | } | |
1687 | ||
e77034f7 | 1688 | static bool kvm_arm_set_device_attr(ARMCPU *cpu, struct kvm_device_attr *attr, |
de3c9601 RH |
1689 | const char *name) |
1690 | { | |
1691 | int err; | |
1692 | ||
e77034f7 | 1693 | err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); |
de3c9601 RH |
1694 | if (err != 0) { |
1695 | error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); | |
1696 | return false; | |
1697 | } | |
1698 | ||
e77034f7 | 1699 | err = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEVICE_ATTR, attr); |
de3c9601 RH |
1700 | if (err != 0) { |
1701 | error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); | |
1702 | return false; | |
1703 | } | |
1704 | ||
1705 | return true; | |
1706 | } | |
1707 | ||
d344f5ba | 1708 | void kvm_arm_pmu_init(ARMCPU *cpu) |
de3c9601 RH |
1709 | { |
1710 | struct kvm_device_attr attr = { | |
1711 | .group = KVM_ARM_VCPU_PMU_V3_CTRL, | |
1712 | .attr = KVM_ARM_VCPU_PMU_V3_INIT, | |
1713 | }; | |
1714 | ||
d344f5ba | 1715 | if (!cpu->has_pmu) { |
de3c9601 RH |
1716 | return; |
1717 | } | |
d344f5ba | 1718 | if (!kvm_arm_set_device_attr(cpu, &attr, "PMU")) { |
de3c9601 RH |
1719 | error_report("failed to init PMU"); |
1720 | abort(); | |
1721 | } | |
1722 | } | |
1723 | ||
5ed84f3b | 1724 | void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq) |
de3c9601 RH |
1725 | { |
1726 | struct kvm_device_attr attr = { | |
1727 | .group = KVM_ARM_VCPU_PMU_V3_CTRL, | |
1728 | .addr = (intptr_t)&irq, | |
1729 | .attr = KVM_ARM_VCPU_PMU_V3_IRQ, | |
1730 | }; | |
1731 | ||
5ed84f3b | 1732 | if (!cpu->has_pmu) { |
de3c9601 RH |
1733 | return; |
1734 | } | |
5ed84f3b | 1735 | if (!kvm_arm_set_device_attr(cpu, &attr, "PMU")) { |
de3c9601 RH |
1736 | error_report("failed to set irq for PMU"); |
1737 | abort(); | |
1738 | } | |
1739 | } | |
1740 | ||
55503372 | 1741 | void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) |
de3c9601 RH |
1742 | { |
1743 | struct kvm_device_attr attr = { | |
1744 | .group = KVM_ARM_VCPU_PVTIME_CTRL, | |
1745 | .attr = KVM_ARM_VCPU_PVTIME_IPA, | |
1746 | .addr = (uint64_t)&ipa, | |
1747 | }; | |
1748 | ||
55503372 | 1749 | if (cpu->kvm_steal_time == ON_OFF_AUTO_OFF) { |
de3c9601 RH |
1750 | return; |
1751 | } | |
55503372 | 1752 | if (!kvm_arm_set_device_attr(cpu, &attr, "PVTIME IPA")) { |
de3c9601 RH |
1753 | error_report("failed to init PVTIME IPA"); |
1754 | abort(); | |
1755 | } | |
1756 | } | |
1757 | ||
1758 | void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) | |
1759 | { | |
1760 | bool has_steal_time = kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); | |
1761 | ||
1762 | if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) { | |
1763 | if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { | |
1764 | cpu->kvm_steal_time = ON_OFF_AUTO_OFF; | |
1765 | } else { | |
1766 | cpu->kvm_steal_time = ON_OFF_AUTO_ON; | |
1767 | } | |
1768 | } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) { | |
1769 | if (!has_steal_time) { | |
1770 | error_setg(errp, "'kvm-steal-time' cannot be enabled " | |
1771 | "on this host"); | |
1772 | return; | |
1773 | } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { | |
1774 | /* | |
1775 | * DEN0057A chapter 2 says "This specification only covers | |
1776 | * systems in which the Execution state of the hypervisor | |
1777 | * as well as EL1 of virtual machines is AArch64.". And, | |
1778 | * to ensure that, the smc/hvc calls are only specified as | |
1779 | * smc64/hvc64. | |
1780 | */ | |
1781 | error_setg(errp, "'kvm-steal-time' cannot be enabled " | |
1782 | "for AArch32 guests"); | |
1783 | return; | |
1784 | } | |
1785 | } | |
1786 | } | |
1787 | ||
1788 | bool kvm_arm_aarch32_supported(void) | |
1789 | { | |
1790 | return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); | |
1791 | } | |
1792 | ||
1793 | bool kvm_arm_sve_supported(void) | |
1794 | { | |
1795 | return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); | |
1796 | } | |
1797 | ||
1798 | QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); | |
1799 | ||
d6339282 | 1800 | uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) |
de3c9601 RH |
1801 | { |
1802 | /* Only call this function if kvm_arm_sve_supported() returns true. */ | |
1803 | static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; | |
1804 | static bool probed; | |
1805 | uint32_t vq = 0; | |
1806 | int i; | |
1807 | ||
1808 | /* | |
1809 | * KVM ensures all host CPUs support the same set of vector lengths. | |
1810 | * So we only need to create the scratch VCPUs once and then cache | |
1811 | * the results. | |
1812 | */ | |
1813 | if (!probed) { | |
1814 | struct kvm_vcpu_init init = { | |
1815 | .target = -1, | |
1816 | .features[0] = (1 << KVM_ARM_VCPU_SVE), | |
1817 | }; | |
1818 | struct kvm_one_reg reg = { | |
1819 | .id = KVM_REG_ARM64_SVE_VLS, | |
1820 | .addr = (uint64_t)&vls[0], | |
1821 | }; | |
1822 | int fdarray[3], ret; | |
1823 | ||
1824 | probed = true; | |
1825 | ||
1826 | if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { | |
1827 | error_report("failed to create scratch VCPU with SVE enabled"); | |
1828 | abort(); | |
1829 | } | |
1830 | ret = ioctl(fdarray[2], KVM_GET_ONE_REG, ®); | |
1831 | kvm_arm_destroy_scratch_host_vcpu(fdarray); | |
1832 | if (ret) { | |
1833 | error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s", | |
1834 | strerror(errno)); | |
1835 | abort(); | |
1836 | } | |
1837 | ||
1838 | for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) { | |
1839 | if (vls[i]) { | |
1840 | vq = 64 - clz64(vls[i]) + i * 64; | |
1841 | break; | |
1842 | } | |
1843 | } | |
1844 | if (vq > ARM_MAX_VQ) { | |
1845 | warn_report("KVM supports vector lengths larger than " | |
1846 | "QEMU can enable"); | |
1847 | vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ); | |
1848 | } | |
1849 | } | |
1850 | ||
1851 | return vls[0]; | |
1852 | } | |
1853 | ||
bc1b09b3 | 1854 | static int kvm_arm_sve_set_vls(ARMCPU *cpu) |
de3c9601 | 1855 | { |
de3c9601 RH |
1856 | uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq.map }; |
1857 | ||
1858 | assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); | |
1859 | ||
bc1b09b3 | 1860 | return kvm_set_one_reg(CPU(cpu), KVM_REG_ARM64_SVE_VLS, &vls[0]); |
de3c9601 RH |
1861 | } |
1862 | ||
1863 | #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 | |
1864 | ||
1865 | int kvm_arch_init_vcpu(CPUState *cs) | |
1866 | { | |
1867 | int ret; | |
1868 | uint64_t mpidr; | |
1869 | ARMCPU *cpu = ARM_CPU(cs); | |
1870 | CPUARMState *env = &cpu->env; | |
1871 | uint64_t psciver; | |
1872 | ||
1873 | if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || | |
1874 | !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { | |
1875 | error_report("KVM is not supported for this guest CPU type"); | |
1876 | return -EINVAL; | |
1877 | } | |
1878 | ||
76acc987 | 1879 | qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cpu); |
de3c9601 RH |
1880 | |
1881 | /* Determine init features for this CPU */ | |
1882 | memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); | |
1883 | if (cs->start_powered_off) { | |
1884 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; | |
1885 | } | |
1886 | if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { | |
1887 | cpu->psci_version = QEMU_PSCI_VERSION_0_2; | |
1888 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; | |
1889 | } | |
1890 | if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { | |
1891 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; | |
1892 | } | |
1893 | if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { | |
1894 | cpu->has_pmu = false; | |
1895 | } | |
1896 | if (cpu->has_pmu) { | |
1897 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; | |
1898 | } else { | |
1899 | env->features &= ~(1ULL << ARM_FEATURE_PMU); | |
1900 | } | |
1901 | if (cpu_isar_feature(aa64_sve, cpu)) { | |
1902 | assert(kvm_arm_sve_supported()); | |
1903 | cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE; | |
1904 | } | |
1905 | if (cpu_isar_feature(aa64_pauth, cpu)) { | |
1906 | cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | | |
1907 | 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); | |
1908 | } | |
1909 | ||
1910 | /* Do KVM_ARM_VCPU_INIT ioctl */ | |
bbb22d58 | 1911 | ret = kvm_arm_vcpu_init(cpu); |
de3c9601 RH |
1912 | if (ret) { |
1913 | return ret; | |
1914 | } | |
1915 | ||
1916 | if (cpu_isar_feature(aa64_sve, cpu)) { | |
bc1b09b3 | 1917 | ret = kvm_arm_sve_set_vls(cpu); |
de3c9601 RH |
1918 | if (ret) { |
1919 | return ret; | |
1920 | } | |
0d31a631 | 1921 | ret = kvm_arm_vcpu_finalize(cpu, KVM_ARM_VCPU_SVE); |
de3c9601 RH |
1922 | if (ret) { |
1923 | return ret; | |
1924 | } | |
1925 | } | |
1926 | ||
1927 | /* | |
1928 | * KVM reports the exact PSCI version it is implementing via a | |
1929 | * special sysreg. If it is present, use its contents to determine | |
1930 | * what to report to the guest in the dtb (it is the PSCI version, | |
1931 | * in the same 15-bits major 16-bits minor format that PSCI_VERSION | |
1932 | * returns). | |
1933 | */ | |
1934 | if (!kvm_get_one_reg(cs, KVM_REG_ARM_PSCI_VERSION, &psciver)) { | |
1935 | cpu->psci_version = psciver; | |
1936 | } | |
1937 | ||
1938 | /* | |
1939 | * When KVM is in use, PSCI is emulated in-kernel and not by qemu. | |
1940 | * Currently KVM has its own idea about MPIDR assignment, so we | |
1941 | * override our defaults with what we get from KVM. | |
1942 | */ | |
1943 | ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); | |
1944 | if (ret) { | |
1945 | return ret; | |
1946 | } | |
1947 | cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; | |
1948 | ||
de3c9601 RH |
1949 | return kvm_arm_init_cpreg_list(cpu); |
1950 | } | |
1951 | ||
1952 | int kvm_arch_destroy_vcpu(CPUState *cs) | |
1953 | { | |
1954 | return 0; | |
1955 | } | |
1956 | ||
1957 | /* Callers must hold the iothread mutex lock */ | |
1958 | static void kvm_inject_arm_sea(CPUState *c) | |
1959 | { | |
1960 | ARMCPU *cpu = ARM_CPU(c); | |
1961 | CPUARMState *env = &cpu->env; | |
1962 | uint32_t esr; | |
1963 | bool same_el; | |
1964 | ||
1965 | c->exception_index = EXCP_DATA_ABORT; | |
1966 | env->exception.target_el = 1; | |
1967 | ||
1968 | /* | |
1969 | * Set the DFSC to synchronous external abort and set FnV to not valid, | |
1970 | * this will tell guest the FAR_ELx is UNKNOWN for this abort. | |
1971 | */ | |
1972 | same_el = arm_current_el(env) == env->exception.target_el; | |
1973 | esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10); | |
1974 | ||
1975 | env->exception.syndrome = esr; | |
1976 | ||
1977 | arm_cpu_do_interrupt(c); | |
1978 | } | |
1979 | ||
1980 | #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ | |
1981 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) | |
1982 | ||
1983 | #define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ | |
1984 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) | |
1985 | ||
1986 | #define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ | |
1987 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) | |
1988 | ||
1989 | static int kvm_arch_put_fpsimd(CPUState *cs) | |
1990 | { | |
1991 | CPUARMState *env = &ARM_CPU(cs)->env; | |
1992 | int i, ret; | |
1993 | ||
1994 | for (i = 0; i < 32; i++) { | |
1995 | uint64_t *q = aa64_vfp_qreg(env, i); | |
1996 | #if HOST_BIG_ENDIAN | |
1997 | uint64_t fp_val[2] = { q[1], q[0] }; | |
1998 | ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), | |
1999 | fp_val); | |
2000 | #else | |
2001 | ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); | |
2002 | #endif | |
2003 | if (ret) { | |
2004 | return ret; | |
2005 | } | |
2006 | } | |
2007 | ||
2008 | return 0; | |
2009 | } | |
2010 | ||
2011 | /* | |
2012 | * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits | |
2013 | * and PREGS and the FFR have a slice size of 256 bits. However we simply hard | |
2014 | * code the slice index to zero for now as it's unlikely we'll need more than | |
2015 | * one slice for quite some time. | |
2016 | */ | |
2017 | static int kvm_arch_put_sve(CPUState *cs) | |
2018 | { | |
2019 | ARMCPU *cpu = ARM_CPU(cs); | |
2020 | CPUARMState *env = &cpu->env; | |
2021 | uint64_t tmp[ARM_MAX_VQ * 2]; | |
2022 | uint64_t *r; | |
2023 | int n, ret; | |
2024 | ||
2025 | for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { | |
2026 | r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); | |
2027 | ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); | |
2028 | if (ret) { | |
2029 | return ret; | |
2030 | } | |
2031 | } | |
2032 | ||
2033 | for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { | |
2034 | r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], | |
2035 | DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); | |
2036 | ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); | |
2037 | if (ret) { | |
2038 | return ret; | |
2039 | } | |
2040 | } | |
2041 | ||
2042 | r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], | |
2043 | DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); | |
2044 | ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); | |
2045 | if (ret) { | |
2046 | return ret; | |
2047 | } | |
2048 | ||
2049 | return 0; | |
2050 | } | |
2051 | ||
2052 | int kvm_arch_put_registers(CPUState *cs, int level) | |
2053 | { | |
2054 | uint64_t val; | |
2055 | uint32_t fpr; | |
2056 | int i, ret; | |
2057 | unsigned int el; | |
2058 | ||
2059 | ARMCPU *cpu = ARM_CPU(cs); | |
2060 | CPUARMState *env = &cpu->env; | |
2061 | ||
2062 | /* If we are in AArch32 mode then we need to copy the AArch32 regs to the | |
2063 | * AArch64 registers before pushing them out to 64-bit KVM. | |
2064 | */ | |
2065 | if (!is_a64(env)) { | |
2066 | aarch64_sync_32_to_64(env); | |
2067 | } | |
2068 | ||
2069 | for (i = 0; i < 31; i++) { | |
2070 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), | |
2071 | &env->xregs[i]); | |
2072 | if (ret) { | |
2073 | return ret; | |
2074 | } | |
2075 | } | |
2076 | ||
2077 | /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the | |
2078 | * QEMU side we keep the current SP in xregs[31] as well. | |
2079 | */ | |
2080 | aarch64_save_sp(env, 1); | |
2081 | ||
2082 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); | |
2083 | if (ret) { | |
2084 | return ret; | |
2085 | } | |
2086 | ||
2087 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); | |
2088 | if (ret) { | |
2089 | return ret; | |
2090 | } | |
2091 | ||
2092 | /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ | |
2093 | if (is_a64(env)) { | |
2094 | val = pstate_read(env); | |
2095 | } else { | |
2096 | val = cpsr_read(env); | |
2097 | } | |
2098 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); | |
2099 | if (ret) { | |
2100 | return ret; | |
2101 | } | |
2102 | ||
2103 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); | |
2104 | if (ret) { | |
2105 | return ret; | |
2106 | } | |
2107 | ||
2108 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); | |
2109 | if (ret) { | |
2110 | return ret; | |
2111 | } | |
2112 | ||
2113 | /* Saved Program State Registers | |
2114 | * | |
2115 | * Before we restore from the banked_spsr[] array we need to | |
2116 | * ensure that any modifications to env->spsr are correctly | |
2117 | * reflected in the banks. | |
2118 | */ | |
2119 | el = arm_current_el(env); | |
2120 | if (el > 0 && !is_a64(env)) { | |
2121 | i = bank_number(env->uncached_cpsr & CPSR_M); | |
2122 | env->banked_spsr[i] = env->spsr; | |
2123 | } | |
2124 | ||
2125 | /* KVM 0-4 map to QEMU banks 1-5 */ | |
2126 | for (i = 0; i < KVM_NR_SPSR; i++) { | |
2127 | ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(spsr[i]), | |
2128 | &env->banked_spsr[i + 1]); | |
2129 | if (ret) { | |
2130 | return ret; | |
2131 | } | |
2132 | } | |
2133 | ||
2134 | if (cpu_isar_feature(aa64_sve, cpu)) { | |
2135 | ret = kvm_arch_put_sve(cs); | |
2136 | } else { | |
2137 | ret = kvm_arch_put_fpsimd(cs); | |
2138 | } | |
2139 | if (ret) { | |
2140 | return ret; | |
2141 | } | |
2142 | ||
2143 | fpr = vfp_get_fpsr(env); | |
2144 | ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); | |
2145 | if (ret) { | |
2146 | return ret; | |
2147 | } | |
2148 | ||
2149 | fpr = vfp_get_fpcr(env); | |
2150 | ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); | |
2151 | if (ret) { | |
2152 | return ret; | |
2153 | } | |
2154 | ||
2155 | write_cpustate_to_list(cpu, true); | |
2156 | ||
2157 | if (!write_list_to_kvmstate(cpu, level)) { | |
2158 | return -EINVAL; | |
2159 | } | |
2160 | ||
2161 | /* | |
2162 | * Setting VCPU events should be triggered after syncing the registers | |
2163 | * to avoid overwriting potential changes made by KVM upon calling | |
2164 | * KVM_SET_VCPU_EVENTS ioctl | |
2165 | */ | |
2166 | ret = kvm_put_vcpu_events(cpu); | |
2167 | if (ret) { | |
2168 | return ret; | |
2169 | } | |
2170 | ||
71c34911 | 2171 | return kvm_arm_sync_mpstate_to_kvm(cpu); |
de3c9601 RH |
2172 | } |
2173 | ||
2174 | static int kvm_arch_get_fpsimd(CPUState *cs) | |
2175 | { | |
2176 | CPUARMState *env = &ARM_CPU(cs)->env; | |
2177 | int i, ret; | |
2178 | ||
2179 | for (i = 0; i < 32; i++) { | |
2180 | uint64_t *q = aa64_vfp_qreg(env, i); | |
2181 | ret = kvm_get_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); | |
2182 | if (ret) { | |
2183 | return ret; | |
2184 | } else { | |
2185 | #if HOST_BIG_ENDIAN | |
2186 | uint64_t t; | |
2187 | t = q[0], q[0] = q[1], q[1] = t; | |
2188 | #endif | |
2189 | } | |
2190 | } | |
2191 | ||
2192 | return 0; | |
2193 | } | |
2194 | ||
2195 | /* | |
2196 | * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits | |
2197 | * and PREGS and the FFR have a slice size of 256 bits. However we simply hard | |
2198 | * code the slice index to zero for now as it's unlikely we'll need more than | |
2199 | * one slice for quite some time. | |
2200 | */ | |
2201 | static int kvm_arch_get_sve(CPUState *cs) | |
2202 | { | |
2203 | ARMCPU *cpu = ARM_CPU(cs); | |
2204 | CPUARMState *env = &cpu->env; | |
2205 | uint64_t *r; | |
2206 | int n, ret; | |
2207 | ||
2208 | for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { | |
2209 | r = &env->vfp.zregs[n].d[0]; | |
2210 | ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); | |
2211 | if (ret) { | |
2212 | return ret; | |
2213 | } | |
2214 | sve_bswap64(r, r, cpu->sve_max_vq * 2); | |
2215 | } | |
2216 | ||
2217 | for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { | |
2218 | r = &env->vfp.pregs[n].p[0]; | |
2219 | ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); | |
2220 | if (ret) { | |
2221 | return ret; | |
2222 | } | |
2223 | sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); | |
2224 | } | |
2225 | ||
2226 | r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; | |
2227 | ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); | |
2228 | if (ret) { | |
2229 | return ret; | |
2230 | } | |
2231 | sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); | |
2232 | ||
2233 | return 0; | |
2234 | } | |
2235 | ||
2236 | int kvm_arch_get_registers(CPUState *cs) | |
2237 | { | |
2238 | uint64_t val; | |
2239 | unsigned int el; | |
2240 | uint32_t fpr; | |
2241 | int i, ret; | |
2242 | ||
2243 | ARMCPU *cpu = ARM_CPU(cs); | |
2244 | CPUARMState *env = &cpu->env; | |
2245 | ||
2246 | for (i = 0; i < 31; i++) { | |
2247 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), | |
2248 | &env->xregs[i]); | |
2249 | if (ret) { | |
2250 | return ret; | |
2251 | } | |
2252 | } | |
2253 | ||
2254 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); | |
2255 | if (ret) { | |
2256 | return ret; | |
2257 | } | |
2258 | ||
2259 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); | |
2260 | if (ret) { | |
2261 | return ret; | |
2262 | } | |
2263 | ||
2264 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); | |
2265 | if (ret) { | |
2266 | return ret; | |
2267 | } | |
2268 | ||
2269 | env->aarch64 = ((val & PSTATE_nRW) == 0); | |
2270 | if (is_a64(env)) { | |
2271 | pstate_write(env, val); | |
2272 | } else { | |
2273 | cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); | |
2274 | } | |
2275 | ||
2276 | /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the | |
2277 | * QEMU side we keep the current SP in xregs[31] as well. | |
2278 | */ | |
2279 | aarch64_restore_sp(env, 1); | |
2280 | ||
2281 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); | |
2282 | if (ret) { | |
2283 | return ret; | |
2284 | } | |
2285 | ||
2286 | /* If we are in AArch32 mode then we need to sync the AArch32 regs with the | |
2287 | * incoming AArch64 regs received from 64-bit KVM. | |
2288 | * We must perform this after all of the registers have been acquired from | |
2289 | * the kernel. | |
2290 | */ | |
2291 | if (!is_a64(env)) { | |
2292 | aarch64_sync_64_to_32(env); | |
2293 | } | |
2294 | ||
2295 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); | |
2296 | if (ret) { | |
2297 | return ret; | |
2298 | } | |
2299 | ||
2300 | /* Fetch the SPSR registers | |
2301 | * | |
2302 | * KVM SPSRs 0-4 map to QEMU banks 1-5 | |
2303 | */ | |
2304 | for (i = 0; i < KVM_NR_SPSR; i++) { | |
2305 | ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(spsr[i]), | |
2306 | &env->banked_spsr[i + 1]); | |
2307 | if (ret) { | |
2308 | return ret; | |
2309 | } | |
2310 | } | |
2311 | ||
2312 | el = arm_current_el(env); | |
2313 | if (el > 0 && !is_a64(env)) { | |
2314 | i = bank_number(env->uncached_cpsr & CPSR_M); | |
2315 | env->spsr = env->banked_spsr[i]; | |
2316 | } | |
2317 | ||
2318 | if (cpu_isar_feature(aa64_sve, cpu)) { | |
2319 | ret = kvm_arch_get_sve(cs); | |
2320 | } else { | |
2321 | ret = kvm_arch_get_fpsimd(cs); | |
2322 | } | |
2323 | if (ret) { | |
2324 | return ret; | |
2325 | } | |
2326 | ||
2327 | ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); | |
2328 | if (ret) { | |
2329 | return ret; | |
2330 | } | |
2331 | vfp_set_fpsr(env, fpr); | |
2332 | ||
2333 | ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); | |
2334 | if (ret) { | |
2335 | return ret; | |
2336 | } | |
2337 | vfp_set_fpcr(env, fpr); | |
2338 | ||
2339 | ret = kvm_get_vcpu_events(cpu); | |
2340 | if (ret) { | |
2341 | return ret; | |
2342 | } | |
2343 | ||
2344 | if (!write_kvmstate_to_list(cpu)) { | |
2345 | return -EINVAL; | |
2346 | } | |
2347 | /* Note that it's OK to have registers which aren't in CPUState, | |
2348 | * so we can ignore a failure return here. | |
2349 | */ | |
2350 | write_list_to_cpustate(cpu); | |
2351 | ||
71c34911 | 2352 | ret = kvm_arm_sync_mpstate_to_qemu(cpu); |
de3c9601 RH |
2353 | |
2354 | /* TODO: other registers */ | |
2355 | return ret; | |
2356 | } | |
2357 | ||
2358 | void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) | |
2359 | { | |
2360 | ram_addr_t ram_addr; | |
2361 | hwaddr paddr; | |
2362 | ||
2363 | assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); | |
2364 | ||
2365 | if (acpi_ghes_present() && addr) { | |
2366 | ram_addr = qemu_ram_addr_from_host(addr); | |
2367 | if (ram_addr != RAM_ADDR_INVALID && | |
2368 | kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { | |
2369 | kvm_hwpoison_page_add(ram_addr); | |
2370 | /* | |
2371 | * If this is a BUS_MCEERR_AR, we know we have been called | |
2372 | * synchronously from the vCPU thread, so we can easily | |
2373 | * synchronize the state and inject an error. | |
2374 | * | |
2375 | * TODO: we currently don't tell the guest at all about | |
2376 | * BUS_MCEERR_AO. In that case we might either be being | |
2377 | * called synchronously from the vCPU thread, or a bit | |
2378 | * later from the main thread, so doing the injection of | |
2379 | * the error would be more complicated. | |
2380 | */ | |
2381 | if (code == BUS_MCEERR_AR) { | |
2382 | kvm_cpu_synchronize_state(c); | |
2383 | if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { | |
2384 | kvm_inject_arm_sea(c); | |
2385 | } else { | |
2386 | error_report("failed to record the error"); | |
2387 | abort(); | |
2388 | } | |
2389 | } | |
2390 | return; | |
2391 | } | |
2392 | if (code == BUS_MCEERR_AO) { | |
2393 | error_report("Hardware memory error at addr %p for memory used by " | |
2394 | "QEMU itself instead of guest system!", addr); | |
2395 | } | |
2396 | } | |
2397 | ||
2398 | if (code == BUS_MCEERR_AR) { | |
2399 | error_report("Hardware memory error!"); | |
2400 | exit(1); | |
2401 | } | |
2402 | } | |
2403 | ||
2404 | /* C6.6.29 BRK instruction */ | |
2405 | static const uint32_t brk_insn = 0xd4200000; | |
2406 | ||
2407 | int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) | |
2408 | { | |
2409 | if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || | |
2410 | cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { | |
2411 | return -EINVAL; | |
2412 | } | |
2413 | return 0; | |
2414 | } | |
2415 | ||
2416 | int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) | |
2417 | { | |
2418 | static uint32_t brk; | |
2419 | ||
2420 | if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || | |
2421 | brk != brk_insn || | |
2422 | cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { | |
2423 | return -EINVAL; | |
2424 | } | |
2425 | return 0; | |
2426 | } |