]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/arm64/kvm/arm.c
KVM: arm64: Commit pending PC adjustemnts before returning to userspace
[mirror_ubuntu-hirsute-kernel.git] / arch / arm64 / kvm / arm.c
CommitLineData
d94d71cb 1// SPDX-License-Identifier: GPL-2.0-only
749cf76c
CD
2/*
3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
749cf76c
CD
5 */
6
85acda3b 7#include <linux/bug.h>
1fcf7ce0 8#include <linux/cpu_pm.h>
749cf76c
CD
9#include <linux/errno.h>
10#include <linux/err.h>
11#include <linux/kvm_host.h>
1085fdc6 12#include <linux/list.h>
749cf76c
CD
13#include <linux/module.h>
14#include <linux/vmalloc.h>
15#include <linux/fs.h>
16#include <linux/mman.h>
17#include <linux/sched.h>
86ce8535 18#include <linux/kvm.h>
2412405b
EA
19#include <linux/kvm_irqfd.h>
20#include <linux/irqbypass.h>
de737089 21#include <linux/sched/stat.h>
eeeee719 22#include <linux/psci.h>
749cf76c
CD
23#include <trace/events/kvm.h>
24
25#define CREATE_TRACE_POINTS
9ed24f4b 26#include "trace_arm.h"
749cf76c 27
7c0f6ba6 28#include <linux/uaccess.h>
749cf76c
CD
29#include <asm/ptrace.h>
30#include <asm/mman.h>
342cd0ab 31#include <asm/tlbflush.h>
5b3e5e5b 32#include <asm/cacheflush.h>
85acda3b 33#include <asm/cpufeature.h>
342cd0ab
CD
34#include <asm/virt.h>
35#include <asm/kvm_arm.h>
36#include <asm/kvm_asm.h>
37#include <asm/kvm_mmu.h>
f7ed45be 38#include <asm/kvm_emulate.h>
910917bb 39#include <asm/sections.h>
749cf76c 40
8564d637
SP
41#include <kvm/arm_hypercalls.h>
42#include <kvm/arm_pmu.h>
43#include <kvm/arm_psci.h>
44
749cf76c
CD
45#ifdef REQUIRES_VIRT
46__asm__(".arch_extension virt");
47#endif
48
d8b369c4 49static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
f19f6644 50DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
d8b369c4 51
14ef9d04
MZ
52DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
53
342cd0ab 54static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
30c95391 55unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
63fec243 56DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
342cd0ab 57
f7ed45be
CD
58/* The VMID used in the VTTBR */
59static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
20475f78 60static u32 kvm_next_vmid;
fb544d1c 61static DEFINE_SPINLOCK(kvm_vmid_lock);
342cd0ab 62
c7da6fa4
PF
63static bool vgic_present;
64
67f69197 65static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
61bbe380
CD
66DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
67
749cf76c
CD
68int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
69{
70 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
71}
72
b9904085 73int kvm_arch_hardware_setup(void *opaque)
749cf76c
CD
74{
75 return 0;
76}
77
b9904085 78int kvm_arch_check_processor_compat(void *opaque)
749cf76c 79{
f257d6dc 80 return 0;
749cf76c
CD
81}
82
c726200d
CD
83int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
84 struct kvm_enable_cap *cap)
85{
86 int r;
87
88 if (cap->flags)
89 return -EINVAL;
90
91 switch (cap->cap) {
92 case KVM_CAP_ARM_NISV_TO_USER:
93 r = 0;
94 kvm->arch.return_nisv_io_abort_to_user = true;
95 break;
96 default:
97 r = -EINVAL;
98 break;
99 }
100
101 return r;
102}
749cf76c 103
5107000f
MZ
104static int kvm_arm_default_max_vcpus(void)
105{
106 return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
107}
108
4f1df628 109static void set_default_spectre(struct kvm *kvm)
23711a5e
MZ
110{
111 /*
112 * The default is to expose CSV2 == 1 if the HW isn't affected.
113 * Although this is a per-CPU feature, we make it global because
114 * asymmetric systems are just a nuisance.
115 *
116 * Userspace can override this as long as it doesn't promise
117 * the impossible.
118 */
119 if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
120 kvm->arch.pfr0_csv2 = 1;
4f1df628
MZ
121 if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED)
122 kvm->arch.pfr0_csv3 = 1;
23711a5e
MZ
123}
124
d5d8184d
CD
125/**
126 * kvm_arch_init_vm - initializes a VM data structure
127 * @kvm: pointer to the KVM struct
128 */
749cf76c
CD
129int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
130{
a0e50aa3 131 int ret;
d5d8184d 132
bca607eb 133 ret = kvm_arm_setup_stage2(kvm, type);
5b6c6742
SP
134 if (ret)
135 return ret;
749cf76c 136
a0e50aa3 137 ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
d5d8184d 138 if (ret)
a0e50aa3 139 return ret;
d5d8184d 140
c8dddecd 141 ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
d5d8184d
CD
142 if (ret)
143 goto out_free_stage2_pgd;
144
6c3d63c9 145 kvm_vgic_early_init(kvm);
a1a64387 146
3caa2d8c 147 /* The maximum number of VCPUs is limited by the host's GIC model */
5107000f 148 kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
3caa2d8c 149
4f1df628 150 set_default_spectre(kvm);
23711a5e 151
d5d8184d
CD
152 return ret;
153out_free_stage2_pgd:
a0e50aa3 154 kvm_free_stage2_pgd(&kvm->arch.mmu);
d5d8184d 155 return ret;
749cf76c
CD
156}
157
1499fa80 158vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
749cf76c
CD
159{
160 return VM_FAULT_SIGBUS;
161}
162
749cf76c 163
d5d8184d
CD
164/**
165 * kvm_arch_destroy_vm - destroy the VM data structure
166 * @kvm: pointer to the KVM struct
167 */
749cf76c
CD
168void kvm_arch_destroy_vm(struct kvm *kvm)
169{
170 int i;
171
d7eec236
MZ
172 bitmap_free(kvm->arch.pmu_filter);
173
b2c9a85d
MZ
174 kvm_vgic_destroy(kvm);
175
749cf76c
CD
176 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
177 if (kvm->vcpus[i]) {
4543bdc0 178 kvm_vcpu_destroy(kvm->vcpus[i]);
749cf76c
CD
179 kvm->vcpus[i] = NULL;
180 }
181 }
6b2ad81b 182 atomic_set(&kvm->online_vcpus, 0);
749cf76c
CD
183}
184
784aa3d7 185int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
749cf76c
CD
186{
187 int r;
188 switch (ext) {
1a89dd91 189 case KVM_CAP_IRQCHIP:
c7da6fa4
PF
190 r = vgic_present;
191 break;
d44758c0 192 case KVM_CAP_IOEVENTFD:
7330672b 193 case KVM_CAP_DEVICE_CTRL:
749cf76c
CD
194 case KVM_CAP_USER_MEMORY:
195 case KVM_CAP_SYNC_MMU:
196 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
197 case KVM_CAP_ONE_REG:
aa024c2f 198 case KVM_CAP_ARM_PSCI:
4447a208 199 case KVM_CAP_ARM_PSCI_0_2:
98047888 200 case KVM_CAP_READONLY_MEM:
ecccf0cc 201 case KVM_CAP_MP_STATE:
460df4c1 202 case KVM_CAP_IMMEDIATE_EXIT:
58bf437f 203 case KVM_CAP_VCPU_EVENTS:
92f35b75 204 case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
c726200d 205 case KVM_CAP_ARM_NISV_TO_USER:
da345174 206 case KVM_CAP_ARM_INJECT_EXT_DABT:
36fb4cd5
WD
207 case KVM_CAP_SET_GUEST_DEBUG:
208 case KVM_CAP_VCPU_ATTRIBUTES:
749cf76c
CD
209 r = 1;
210 break;
3401d546
CD
211 case KVM_CAP_ARM_SET_DEVICE_ADDR:
212 r = 1;
ca46e10f 213 break;
749cf76c
CD
214 case KVM_CAP_NR_VCPUS:
215 r = num_online_cpus();
216 break;
217 case KVM_CAP_MAX_VCPUS:
a86cb413 218 case KVM_CAP_MAX_VCPU_ID:
5107000f
MZ
219 if (kvm)
220 r = kvm->arch.max_vcpus;
221 else
222 r = kvm_arm_default_max_vcpus();
a86cb413 223 break;
2988509d
VM
224 case KVM_CAP_MSI_DEVID:
225 if (!kvm)
226 r = -EINVAL;
227 else
228 r = kvm->arch.vgic.msis_require_devid;
229 break;
f7214e60
CD
230 case KVM_CAP_ARM_USER_IRQ:
231 /*
232 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
233 * (bump this number if adding more devices)
234 */
235 r = 1;
236 break;
004a0124
AJ
237 case KVM_CAP_STEAL_TIME:
238 r = kvm_arm_pvtime_supported();
239 break;
36fb4cd5
WD
240 case KVM_CAP_ARM_EL1_32BIT:
241 r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
242 break;
243 case KVM_CAP_GUEST_DEBUG_HW_BPS:
244 r = get_num_brps();
245 break;
246 case KVM_CAP_GUEST_DEBUG_HW_WPS:
247 r = get_num_wrps();
248 break;
249 case KVM_CAP_ARM_PMU_V3:
250 r = kvm_arm_support_pmu_v3();
251 break;
252 case KVM_CAP_ARM_INJECT_SERROR_ESR:
253 r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
254 break;
255 case KVM_CAP_ARM_VM_IPA_SIZE:
256 r = get_kvm_ipa_limit();
749cf76c 257 break;
36fb4cd5
WD
258 case KVM_CAP_ARM_SVE:
259 r = system_supports_sve();
260 break;
261 case KVM_CAP_ARM_PTRAUTH_ADDRESS:
262 case KVM_CAP_ARM_PTRAUTH_GENERIC:
263 r = system_has_full_ptr_auth();
264 break;
265 default:
266 r = 0;
749cf76c 267 }
36fb4cd5 268
749cf76c
CD
269 return r;
270}
271
272long kvm_arch_dev_ioctl(struct file *filp,
273 unsigned int ioctl, unsigned long arg)
274{
275 return -EINVAL;
276}
277
d1e5b0e9
MO
278struct kvm *kvm_arch_alloc_vm(void)
279{
280 if (!has_vhe())
281 return kzalloc(sizeof(struct kvm), GFP_KERNEL);
282
283 return vzalloc(sizeof(struct kvm));
284}
285
286void kvm_arch_free_vm(struct kvm *kvm)
287{
288 if (!has_vhe())
289 kfree(kvm);
290 else
291 vfree(kvm);
292}
749cf76c 293
897cc38e
SC
294int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
295{
296 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
297 return -EBUSY;
298
299 if (id >= kvm->arch.max_vcpus)
300 return -EINVAL;
301
302 return 0;
303}
304
e529ef66 305int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
749cf76c 306{
39a93a87
SC
307 int err;
308
309 /* Force users to call KVM_ARM_VCPU_INIT */
310 vcpu->arch.target = -1;
311 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
312
e539451b
SC
313 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
314
39a93a87
SC
315 /* Set up the timer */
316 kvm_timer_vcpu_init(vcpu);
317
318 kvm_pmu_vcpu_init(vcpu);
319
320 kvm_arm_reset_debug_ptr(vcpu);
321
322 kvm_arm_pvtime_vcpu_init(&vcpu->arch);
323
a0e50aa3
CD
324 vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
325
39a93a87
SC
326 err = kvm_vgic_vcpu_init(vcpu);
327 if (err)
328 return err;
329
e529ef66 330 return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
749cf76c
CD
331}
332
31928aa5 333void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
749cf76c 334{
749cf76c
CD
335}
336
4b8fff78 337void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
749cf76c 338{
f1d7231c
CD
339 if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
340 static_branch_dec(&userspace_irqchip_in_use);
341
9af3e08b 342 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
967f8427 343 kvm_timer_vcpu_terminate(vcpu);
5f0a714a 344 kvm_pmu_vcpu_destroy(vcpu);
19bcc89e
SC
345
346 kvm_arm_vcpu_destroy(vcpu);
749cf76c
CD
347}
348
749cf76c
CD
349int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
350{
1c88ab7e 351 return kvm_timer_is_pending(vcpu);
749cf76c
CD
352}
353
d35268da
CD
354void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
355{
5eeaf10e
MZ
356 /*
357 * If we're about to block (most likely because we've just hit a
358 * WFI), we need to sync back the state of the GIC CPU interface
8e01d9a3 359 * so that we have the latest PMR and group enables. This ensures
5eeaf10e
MZ
360 * that kvm_arch_vcpu_runnable has up-to-date data to decide
361 * whether we have pending interrupts.
8e01d9a3
MZ
362 *
363 * For the same reason, we want to tell GICv4 that we need
364 * doorbells to be signalled, should an interrupt become pending.
5eeaf10e
MZ
365 */
366 preempt_disable();
367 kvm_vgic_vmcr_sync(vcpu);
8e01d9a3 368 vgic_v4_put(vcpu, true);
5eeaf10e 369 preempt_enable();
d35268da
CD
370}
371
372void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
373{
8e01d9a3
MZ
374 preempt_disable();
375 vgic_v4_load(vcpu);
376 preempt_enable();
d35268da
CD
377}
378
749cf76c
CD
379void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
380{
a0e50aa3 381 struct kvm_s2_mmu *mmu;
94d0e598
MZ
382 int *last_ran;
383
a0e50aa3
CD
384 mmu = vcpu->arch.hw_mmu;
385 last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
94d0e598
MZ
386
387 /*
d06721ff
MZ
388 * We guarantee that both TLBs and I-cache are private to each
389 * vcpu. If detecting that a vcpu from the same VM has
390 * previously run on the same physical CPU, call into the
391 * hypervisor code to nuke the relevant contexts.
392 *
94d0e598
MZ
393 * We might get preempted before the vCPU actually runs, but
394 * over-invalidation doesn't affect correctness.
395 */
396 if (*last_ran != vcpu->vcpu_id) {
d06721ff 397 kvm_call_hyp(__kvm_flush_cpu_context, mmu);
94d0e598
MZ
398 *last_ran = vcpu->vcpu_id;
399 }
400
86ce8535 401 vcpu->cpu = cpu;
5b3e5e5b 402
328e5664 403 kvm_vgic_load(vcpu);
b103cc3f 404 kvm_timer_vcpu_load(vcpu);
13aeb9b4
DB
405 if (has_vhe())
406 kvm_vcpu_load_sysregs_vhe(vcpu);
e6b673b7 407 kvm_arch_vcpu_load_fp(vcpu);
435e53fb 408 kvm_vcpu_pmu_restore_guest(vcpu);
8564d637
SP
409 if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
410 kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
de737089
MZ
411
412 if (single_task_running())
ef2e78dd 413 vcpu_clear_wfx_traps(vcpu);
de737089 414 else
ef2e78dd 415 vcpu_set_wfx_traps(vcpu);
384b40ca 416
29eb5a3c 417 if (vcpu_has_ptrauth(vcpu))
ef3e40a7 418 vcpu_ptrauth_disable(vcpu);
749cf76c
CD
419}
420
421void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
422{
e6b673b7 423 kvm_arch_vcpu_put_fp(vcpu);
13aeb9b4
DB
424 if (has_vhe())
425 kvm_vcpu_put_sysregs_vhe(vcpu);
b103cc3f 426 kvm_timer_vcpu_put(vcpu);
328e5664 427 kvm_vgic_put(vcpu);
435e53fb 428 kvm_vcpu_pmu_restore_host(vcpu);
328e5664 429
e9b152cb 430 vcpu->cpu = -1;
749cf76c
CD
431}
432
424c989b
AJ
433static void vcpu_power_off(struct kvm_vcpu *vcpu)
434{
435 vcpu->arch.power_off = true;
7b244e2b 436 kvm_make_request(KVM_REQ_SLEEP, vcpu);
424c989b
AJ
437 kvm_vcpu_kick(vcpu);
438}
439
749cf76c
CD
440int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
441 struct kvm_mp_state *mp_state)
442{
3781528e 443 if (vcpu->arch.power_off)
ecccf0cc
AB
444 mp_state->mp_state = KVM_MP_STATE_STOPPED;
445 else
446 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
447
448 return 0;
749cf76c
CD
449}
450
451int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
452 struct kvm_mp_state *mp_state)
453{
e83dff5e
CD
454 int ret = 0;
455
ecccf0cc
AB
456 switch (mp_state->mp_state) {
457 case KVM_MP_STATE_RUNNABLE:
3781528e 458 vcpu->arch.power_off = false;
ecccf0cc
AB
459 break;
460 case KVM_MP_STATE_STOPPED:
424c989b 461 vcpu_power_off(vcpu);
ecccf0cc
AB
462 break;
463 default:
e83dff5e 464 ret = -EINVAL;
ecccf0cc
AB
465 }
466
e83dff5e 467 return ret;
749cf76c
CD
468}
469
5b3e5e5b
CD
470/**
471 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
472 * @v: The VCPU pointer
473 *
474 * If the guest CPU is not waiting for interrupts or an interrupt line is
475 * asserted, the CPU is by definition runnable.
476 */
749cf76c
CD
477int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
478{
3df59d8d
CD
479 bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
480 return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
3b92830a 481 && !v->arch.power_off && !v->arch.pause);
749cf76c
CD
482}
483
199b5763
LM
484bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
485{
f01fbd2f 486 return vcpu_mode_priv(vcpu);
199b5763
LM
487}
488
f7ed45be
CD
489/* Just ensure a guest exit from a particular CPU */
490static void exit_vm_noop(void *info)
491{
492}
493
494void force_vm_exit(const cpumask_t *mask)
495{
898f949f 496 preempt_disable();
f7ed45be 497 smp_call_function_many(mask, exit_vm_noop, NULL, true);
898f949f 498 preempt_enable();
f7ed45be
CD
499}
500
501/**
502 * need_new_vmid_gen - check that the VMID is still valid
e329fb75 503 * @vmid: The VMID to check
f7ed45be
CD
504 *
505 * return true if there is a new generation of VMIDs being used
506 *
e329fb75
CD
507 * The hardware supports a limited set of values with the value zero reserved
508 * for the host, so we check if an assigned value belongs to a previous
656012c7
FT
509 * generation, which requires us to assign a new value. If we're the first to
510 * use a VMID for the new generation, we must flush necessary caches and TLBs
511 * on all CPUs.
f7ed45be 512 */
e329fb75 513static bool need_new_vmid_gen(struct kvm_vmid *vmid)
f7ed45be 514{
fb544d1c
CD
515 u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
516 smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
e329fb75 517 return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
f7ed45be
CD
518}
519
520/**
e329fb75 521 * update_vmid - Update the vmid with a valid VMID for the current generation
e329fb75 522 * @vmid: The stage-2 VMID information struct
f7ed45be 523 */
e329fb75 524static void update_vmid(struct kvm_vmid *vmid)
f7ed45be 525{
e329fb75 526 if (!need_new_vmid_gen(vmid))
f7ed45be
CD
527 return;
528
fb544d1c 529 spin_lock(&kvm_vmid_lock);
f7ed45be
CD
530
531 /*
532 * We need to re-check the vmid_gen here to ensure that if another vcpu
533 * already allocated a valid vmid for this vm, then this vcpu should
534 * use the same vmid.
535 */
e329fb75 536 if (!need_new_vmid_gen(vmid)) {
fb544d1c 537 spin_unlock(&kvm_vmid_lock);
f7ed45be
CD
538 return;
539 }
540
541 /* First user of a new VMID generation? */
542 if (unlikely(kvm_next_vmid == 0)) {
543 atomic64_inc(&kvm_vmid_gen);
544 kvm_next_vmid = 1;
545
546 /*
547 * On SMP we know no other CPUs can use this CPU's or each
548 * other's VMID after force_vm_exit returns since the
549 * kvm_vmid_lock blocks them from reentry to the guest.
550 */
551 force_vm_exit(cpu_all_mask);
552 /*
553 * Now broadcast TLB + ICACHE invalidation over the inner
554 * shareable domain to make sure all data structures are
555 * clean.
556 */
557 kvm_call_hyp(__kvm_flush_vm_context);
558 }
559
e329fb75 560 vmid->vmid = kvm_next_vmid;
f7ed45be 561 kvm_next_vmid++;
e329fb75 562 kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
f7ed45be 563
fb544d1c 564 smp_wmb();
e329fb75 565 WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
fb544d1c
CD
566
567 spin_unlock(&kvm_vmid_lock);
f7ed45be
CD
568}
569
f7ed45be
CD
570static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
571{
05971120 572 struct kvm *kvm = vcpu->kvm;
41a54482 573 int ret = 0;
e1ba0207 574
f7ed45be
CD
575 if (likely(vcpu->arch.has_run_once))
576 return 0;
577
7dd32a0d
DM
578 if (!kvm_arm_vcpu_is_finalized(vcpu))
579 return -EPERM;
580
f7ed45be 581 vcpu->arch.has_run_once = true;
aa024c2f 582
19a6365d
AE
583 kvm_arm_vcpu_init_debug(vcpu);
584
61bbe380
CD
585 if (likely(irqchip_in_kernel(kvm))) {
586 /*
587 * Map the VGIC hardware resources before running a vcpu the
588 * first time on this VM.
589 */
1c91f06d
AE
590 ret = kvm_vgic_map_resources(kvm);
591 if (ret)
592 return ret;
61bbe380
CD
593 } else {
594 /*
595 * Tell the rest of the code that there are userspace irqchip
596 * VMs in the wild.
597 */
598 static_branch_inc(&userspace_irqchip_in_use);
01ac5e34
MZ
599 }
600
d9e13977 601 ret = kvm_timer_enable(vcpu);
a2befacf
CD
602 if (ret)
603 return ret;
604
605 ret = kvm_arm_pmu_v3_enable(vcpu);
05971120 606
41a54482 607 return ret;
f7ed45be
CD
608}
609
c1426e4c
EA
610bool kvm_arch_intc_initialized(struct kvm *kvm)
611{
612 return vgic_initialized(kvm);
613}
614
b13216cf 615void kvm_arm_halt_guest(struct kvm *kvm)
3b92830a
EA
616{
617 int i;
618 struct kvm_vcpu *vcpu;
619
620 kvm_for_each_vcpu(i, vcpu, kvm)
621 vcpu->arch.pause = true;
7b244e2b 622 kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
3b92830a
EA
623}
624
b13216cf 625void kvm_arm_resume_guest(struct kvm *kvm)
3b92830a
EA
626{
627 int i;
628 struct kvm_vcpu *vcpu;
629
abd72296
CD
630 kvm_for_each_vcpu(i, vcpu, kvm) {
631 vcpu->arch.pause = false;
da4ad88c 632 rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
abd72296 633 }
3b92830a
EA
634}
635
7b244e2b 636static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
aa024c2f 637{
da4ad88c 638 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
aa024c2f 639
da4ad88c
DB
640 rcuwait_wait_event(wait,
641 (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
642 TASK_INTERRUPTIBLE);
0592c005 643
424c989b 644 if (vcpu->arch.power_off || vcpu->arch.pause) {
0592c005 645 /* Awaken to handle a signal, request we sleep again later. */
7b244e2b 646 kvm_make_request(KVM_REQ_SLEEP, vcpu);
0592c005 647 }
358b28f0
MZ
648
649 /*
650 * Make sure we will observe a potential reset request if we've
651 * observed a change to the power state. Pairs with the smp_wmb() in
652 * kvm_psci_vcpu_on().
653 */
654 smp_rmb();
aa024c2f
MZ
655}
656
e8180dca
AP
657static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
658{
659 return vcpu->arch.target >= 0;
660}
661
0592c005
AJ
662static void check_vcpu_requests(struct kvm_vcpu *vcpu)
663{
664 if (kvm_request_pending(vcpu)) {
7b244e2b
AJ
665 if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
666 vcpu_req_sleep(vcpu);
325f9c64 667
358b28f0
MZ
668 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
669 kvm_reset_vcpu(vcpu);
670
325f9c64
AJ
671 /*
672 * Clear IRQ_PENDING requests that were made to guarantee
673 * that a VCPU sees new virtual interrupts.
674 */
675 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
8564d637
SP
676
677 if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
678 kvm_update_stolen_time(vcpu);
d9c3872c
MZ
679
680 if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
681 /* The distributor enable bits were changed */
682 preempt_disable();
683 vgic_v4_put(vcpu, false);
684 vgic_v4_load(vcpu);
685 preempt_enable();
686 }
0592c005
AJ
687 }
688}
689
f7ed45be
CD
690/**
691 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
692 * @vcpu: The VCPU pointer
f7ed45be
CD
693 *
694 * This function is called through the VCPU_RUN ioctl called from user space. It
695 * will execute VM code in a loop until the time slice for the process is used
696 * or some emulation is needed from user space in which case the function will
697 * return with return value 0 and with the kvm_run structure filled in with the
698 * required data for the requested emulation.
699 */
1b94f6f8 700int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
749cf76c 701{
1b94f6f8 702 struct kvm_run *run = vcpu->run;
f7ed45be 703 int ret;
f7ed45be 704
e8180dca 705 if (unlikely(!kvm_vcpu_initialized(vcpu)))
f7ed45be
CD
706 return -ENOEXEC;
707
708 ret = kvm_vcpu_first_run_init(vcpu);
709 if (ret)
829a5863 710 return ret;
f7ed45be 711
45e96ea6 712 if (run->exit_reason == KVM_EXIT_MMIO) {
74cc7e0c 713 ret = kvm_handle_mmio_return(vcpu);
45e96ea6 714 if (ret)
829a5863 715 return ret;
45e96ea6
CD
716 }
717
829a5863
CD
718 if (run->immediate_exit)
719 return -EINTR;
720
721 vcpu_load(vcpu);
460df4c1 722
20b7035c 723 kvm_sigset_activate(vcpu);
f7ed45be
CD
724
725 ret = 1;
726 run->exit_reason = KVM_EXIT_UNKNOWN;
727 while (ret > 0) {
728 /*
729 * Check conditions before entering the guest
730 */
731 cond_resched();
732
a0e50aa3 733 update_vmid(&vcpu->arch.hw_mmu->vmid);
f7ed45be 734
0592c005
AJ
735 check_vcpu_requests(vcpu);
736
abdf5843
MZ
737 /*
738 * Preparing the interrupts to be injected also
739 * involves poking the GIC, which must be done in a
740 * non-preemptible context.
741 */
1b3d546d 742 preempt_disable();
328e5664 743
b02386eb 744 kvm_pmu_flush_hwstate(vcpu);
328e5664 745
f7ed45be
CD
746 local_irq_disable();
747
abdf5843
MZ
748 kvm_vgic_flush_hwstate(vcpu);
749
f7ed45be 750 /*
61bbe380
CD
751 * Exit if we have a signal pending so that we can deliver the
752 * signal to user space.
f7ed45be 753 */
61bbe380 754 if (signal_pending(current)) {
f7ed45be
CD
755 ret = -EINTR;
756 run->exit_reason = KVM_EXIT_INTR;
757 }
758
61bbe380
CD
759 /*
760 * If we're using a userspace irqchip, then check if we need
761 * to tell a userspace irqchip about timer or PMU level
762 * changes and if so, exit to userspace (the actual level
763 * state gets updated in kvm_timer_update_run and
764 * kvm_pmu_update_run below).
765 */
766 if (static_branch_unlikely(&userspace_irqchip_in_use)) {
767 if (kvm_timer_should_notify_user(vcpu) ||
768 kvm_pmu_should_notify_user(vcpu)) {
769 ret = -EINTR;
770 run->exit_reason = KVM_EXIT_INTR;
771 }
772 }
773
6a6d73be
AJ
774 /*
775 * Ensure we set mode to IN_GUEST_MODE after we disable
776 * interrupts and before the final VCPU requests check.
777 * See the comment in kvm_vcpu_exiting_guest_mode() and
2f5947df 778 * Documentation/virt/kvm/vcpu-requests.rst
6a6d73be
AJ
779 */
780 smp_store_mb(vcpu->mode, IN_GUEST_MODE);
781
a0e50aa3 782 if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
424c989b 783 kvm_request_pending(vcpu)) {
6a6d73be 784 vcpu->mode = OUTSIDE_GUEST_MODE;
771621b0 785 isb(); /* Ensure work in x_flush_hwstate is committed */
b02386eb 786 kvm_pmu_sync_hwstate(vcpu);
61bbe380 787 if (static_branch_unlikely(&userspace_irqchip_in_use))
3c5ff0c6 788 kvm_timer_sync_user(vcpu);
1a89dd91 789 kvm_vgic_sync_hwstate(vcpu);
ee9bb9a1 790 local_irq_enable();
abdf5843 791 preempt_enable();
f7ed45be
CD
792 continue;
793 }
794
56c7f5e7
AB
795 kvm_arm_setup_debug(vcpu);
796
f7ed45be
CD
797 /**************************************************************
798 * Enter the guest
799 */
800 trace_kvm_entry(*vcpu_pc(vcpu));
6edaa530 801 guest_enter_irqoff();
f7ed45be 802
09cf57eb 803 ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
3f5c90b8 804
f7ed45be 805 vcpu->mode = OUTSIDE_GUEST_MODE;
b19e6892 806 vcpu->stat.exits++;
1b3d546d
CD
807 /*
808 * Back from guest
809 *************************************************************/
810
56c7f5e7
AB
811 kvm_arm_clear_debug(vcpu);
812
ee9bb9a1 813 /*
b103cc3f 814 * We must sync the PMU state before the vgic state so
ee9bb9a1
CD
815 * that the vgic can properly sample the updated state of the
816 * interrupt line.
817 */
818 kvm_pmu_sync_hwstate(vcpu);
ee9bb9a1 819
b103cc3f
CD
820 /*
821 * Sync the vgic state before syncing the timer state because
822 * the timer code needs to know if the virtual timer
823 * interrupts are active.
824 */
ee9bb9a1
CD
825 kvm_vgic_sync_hwstate(vcpu);
826
b103cc3f
CD
827 /*
828 * Sync the timer hardware state before enabling interrupts as
829 * we don't want vtimer interrupts to race with syncing the
830 * timer virtual interrupt state.
831 */
61bbe380 832 if (static_branch_unlikely(&userspace_irqchip_in_use))
3c5ff0c6 833 kvm_timer_sync_user(vcpu);
b103cc3f 834
e6b673b7
DM
835 kvm_arch_vcpu_ctxsync_fp(vcpu);
836
f7ed45be
CD
837 /*
838 * We may have taken a host interrupt in HYP mode (ie
839 * while executing the guest). This interrupt is still
840 * pending, as we haven't serviced it yet!
841 *
842 * We're now back in SVC mode, with interrupts
843 * disabled. Enabling the interrupts now will have
844 * the effect of taking the interrupt again, in SVC
845 * mode this time.
846 */
847 local_irq_enable();
848
849 /*
6edaa530 850 * We do local_irq_enable() before calling guest_exit() so
1b3d546d
CD
851 * that if a timer interrupt hits while running the guest we
852 * account that tick as being spent in the guest. We enable
6edaa530 853 * preemption after calling guest_exit() so that if we get
1b3d546d
CD
854 * preempted we make sure ticks after that is not counted as
855 * guest time.
856 */
6edaa530 857 guest_exit();
b5905dc1 858 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
1b3d546d 859
3368bd80 860 /* Exit types that need handling before we can be preempted */
74cc7e0c 861 handle_exit_early(vcpu, ret);
3368bd80 862
abdf5843
MZ
863 preempt_enable();
864
22f55384
QY
865 /*
866 * The ARMv8 architecture doesn't give the hypervisor
867 * a mechanism to prevent a guest from dropping to AArch32 EL0
868 * if implemented by the CPU. If we spot the guest in such
869 * state and that we decided it wasn't supposed to do so (like
870 * with the asymmetric AArch32 case), return to userspace with
871 * a fatal error.
872 */
873 if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) {
874 /*
875 * As we have caught the guest red-handed, decide that
876 * it isn't fit for purpose anymore by making the vcpu
877 * invalid. The VMM can try and fix it by issuing a
878 * KVM_ARM_VCPU_INIT if it really wants to.
879 */
880 vcpu->arch.target = -1;
881 ret = ARM_EXCEPTION_IL;
882 }
883
74cc7e0c 884 ret = handle_exit(vcpu, ret);
f7ed45be
CD
885 }
886
d9e13977 887 /* Tell userspace about in-kernel device output levels */
3dbbdf78
CD
888 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
889 kvm_timer_update_run(vcpu);
890 kvm_pmu_update_run(vcpu);
891 }
d9e13977 892
20b7035c
JS
893 kvm_sigset_deactivate(vcpu);
894
36ed0462
MZ
895 /*
896 * In the unlikely event that we are returning to userspace
897 * with pending exceptions or PC adjustment, commit these
898 * adjustments in order to give userspace a consistent view of
899 * the vcpu state. Note that this relies on __kvm_adjust_pc()
900 * being preempt-safe on VHE.
901 */
902 if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
903 KVM_ARM64_INCREMENT_PC)))
904 kvm_call_hyp(__kvm_adjust_pc, vcpu);
905
accb757d 906 vcpu_put(vcpu);
f7ed45be 907 return ret;
749cf76c
CD
908}
909
86ce8535
CD
910static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
911{
912 int bit_index;
913 bool set;
3df59d8d 914 unsigned long *hcr;
86ce8535
CD
915
916 if (number == KVM_ARM_IRQ_CPU_IRQ)
917 bit_index = __ffs(HCR_VI);
918 else /* KVM_ARM_IRQ_CPU_FIQ */
919 bit_index = __ffs(HCR_VF);
920
3df59d8d 921 hcr = vcpu_hcr(vcpu);
86ce8535 922 if (level)
3df59d8d 923 set = test_and_set_bit(bit_index, hcr);
86ce8535 924 else
3df59d8d 925 set = test_and_clear_bit(bit_index, hcr);
86ce8535
CD
926
927 /*
928 * If we didn't change anything, no need to wake up or kick other CPUs
929 */
930 if (set == level)
931 return 0;
932
933 /*
934 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
935 * trigger a world-switch round on the running physical CPU to set the
936 * virtual IRQ/FIQ fields in the HCR appropriately.
937 */
325f9c64 938 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
86ce8535
CD
939 kvm_vcpu_kick(vcpu);
940
941 return 0;
942}
943
79558f11
AG
944int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
945 bool line_status)
86ce8535
CD
946{
947 u32 irq = irq_level->irq;
948 unsigned int irq_type, vcpu_idx, irq_num;
949 int nrcpus = atomic_read(&kvm->online_vcpus);
950 struct kvm_vcpu *vcpu = NULL;
951 bool level = irq_level->level;
952
953 irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
954 vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
92f35b75 955 vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
86ce8535
CD
956 irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
957
958 trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
959
5863c2ce
MZ
960 switch (irq_type) {
961 case KVM_ARM_IRQ_TYPE_CPU:
962 if (irqchip_in_kernel(kvm))
963 return -ENXIO;
86ce8535 964
5863c2ce
MZ
965 if (vcpu_idx >= nrcpus)
966 return -EINVAL;
86ce8535 967
5863c2ce
MZ
968 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
969 if (!vcpu)
970 return -EINVAL;
86ce8535 971
5863c2ce
MZ
972 if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
973 return -EINVAL;
974
975 return vcpu_interrupt_line(vcpu, irq_num, level);
976 case KVM_ARM_IRQ_TYPE_PPI:
977 if (!irqchip_in_kernel(kvm))
978 return -ENXIO;
979
980 if (vcpu_idx >= nrcpus)
981 return -EINVAL;
982
983 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
984 if (!vcpu)
985 return -EINVAL;
986
987 if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
988 return -EINVAL;
86ce8535 989
cb3f0ad8 990 return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
5863c2ce
MZ
991 case KVM_ARM_IRQ_TYPE_SPI:
992 if (!irqchip_in_kernel(kvm))
993 return -ENXIO;
994
fd1d0ddf 995 if (irq_num < VGIC_NR_PRIVATE_IRQS)
5863c2ce
MZ
996 return -EINVAL;
997
cb3f0ad8 998 return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
5863c2ce
MZ
999 }
1000
1001 return -EINVAL;
86ce8535
CD
1002}
1003
f7fa034d
CD
1004static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
1005 const struct kvm_vcpu_init *init)
1006{
811328fc 1007 unsigned int i, ret;
f7fa034d
CD
1008 int phys_target = kvm_target_cpu();
1009
1010 if (init->target != phys_target)
1011 return -EINVAL;
1012
1013 /*
1014 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
1015 * use the same target.
1016 */
1017 if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
1018 return -EINVAL;
1019
1020 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
1021 for (i = 0; i < sizeof(init->features) * 8; i++) {
1022 bool set = (init->features[i / 32] & (1 << (i % 32)));
1023
1024 if (set && i >= KVM_VCPU_MAX_FEATURES)
1025 return -ENOENT;
1026
1027 /*
1028 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
1029 * use the same feature set.
1030 */
1031 if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
1032 test_bit(i, vcpu->arch.features) != set)
1033 return -EINVAL;
1034
1035 if (set)
1036 set_bit(i, vcpu->arch.features);
1037 }
1038
1039 vcpu->arch.target = phys_target;
1040
1041 /* Now we know what it is, we can reset it. */
811328fc
AJ
1042 ret = kvm_reset_vcpu(vcpu);
1043 if (ret) {
1044 vcpu->arch.target = -1;
1045 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
1046 }
f7fa034d 1047
811328fc
AJ
1048 return ret;
1049}
f7fa034d 1050
478a8237
CD
1051static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
1052 struct kvm_vcpu_init *init)
1053{
1054 int ret;
1055
1056 ret = kvm_vcpu_set_target(vcpu, init);
1057 if (ret)
1058 return ret;
1059
957db105
CD
1060 /*
1061 * Ensure a rebooted VM will fault in RAM pages and detect if the
1062 * guest MMU is turned off and flush the caches as needed.
892713e9 1063 *
7ae2f3db
MZ
1064 * S2FWB enforces all memory accesses to RAM being cacheable,
1065 * ensuring that the data side is always coherent. We still
1066 * need to invalidate the I-cache though, as FWB does *not*
1067 * imply CTR_EL0.DIC.
957db105 1068 */
7ae2f3db
MZ
1069 if (vcpu->arch.has_run_once) {
1070 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
1071 stage2_unmap_vm(vcpu->kvm);
1072 else
1073 __flush_icache_all();
1074 }
957db105 1075
b856a591
CD
1076 vcpu_reset_hcr(vcpu);
1077
478a8237 1078 /*
3781528e 1079 * Handle the "start in power-off" case.
478a8237 1080 */
03f1d4c1 1081 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
424c989b 1082 vcpu_power_off(vcpu);
3ad8b3de 1083 else
3781528e 1084 vcpu->arch.power_off = false;
478a8237
CD
1085
1086 return 0;
1087}
1088
f577f6c2
SZ
1089static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
1090 struct kvm_device_attr *attr)
1091{
1092 int ret = -ENXIO;
1093
1094 switch (attr->group) {
1095 default:
bb0c70bc 1096 ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
f577f6c2
SZ
1097 break;
1098 }
1099
1100 return ret;
1101}
1102
1103static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
1104 struct kvm_device_attr *attr)
1105{
1106 int ret = -ENXIO;
1107
1108 switch (attr->group) {
1109 default:
bb0c70bc 1110 ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
f577f6c2
SZ
1111 break;
1112 }
1113
1114 return ret;
1115}
1116
1117static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
1118 struct kvm_device_attr *attr)
1119{
1120 int ret = -ENXIO;
1121
1122 switch (attr->group) {
1123 default:
bb0c70bc 1124 ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
f577f6c2
SZ
1125 break;
1126 }
1127
1128 return ret;
1129}
1130
539aee0e
JM
1131static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1132 struct kvm_vcpu_events *events)
1133{
1134 memset(events, 0, sizeof(*events));
1135
1136 return __kvm_arm_vcpu_get_events(vcpu, events);
1137}
1138
1139static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1140 struct kvm_vcpu_events *events)
1141{
1142 int i;
1143
1144 /* check whether the reserved field is zero */
1145 for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
1146 if (events->reserved[i])
1147 return -EINVAL;
1148
1149 /* check whether the pad field is zero */
1150 for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
1151 if (events->exception.pad[i])
1152 return -EINVAL;
1153
1154 return __kvm_arm_vcpu_set_events(vcpu, events);
1155}
539aee0e 1156
749cf76c
CD
1157long kvm_arch_vcpu_ioctl(struct file *filp,
1158 unsigned int ioctl, unsigned long arg)
1159{
1160 struct kvm_vcpu *vcpu = filp->private_data;
1161 void __user *argp = (void __user *)arg;
f577f6c2 1162 struct kvm_device_attr attr;
9b062471
CD
1163 long r;
1164
749cf76c
CD
1165 switch (ioctl) {
1166 case KVM_ARM_VCPU_INIT: {
1167 struct kvm_vcpu_init init;
1168
9b062471 1169 r = -EFAULT;
749cf76c 1170 if (copy_from_user(&init, argp, sizeof(init)))
9b062471 1171 break;
749cf76c 1172
9b062471
CD
1173 r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
1174 break;
749cf76c
CD
1175 }
1176 case KVM_SET_ONE_REG:
1177 case KVM_GET_ONE_REG: {
1178 struct kvm_one_reg reg;
e8180dca 1179
9b062471 1180 r = -ENOEXEC;
e8180dca 1181 if (unlikely(!kvm_vcpu_initialized(vcpu)))
9b062471 1182 break;
e8180dca 1183
9b062471 1184 r = -EFAULT;
749cf76c 1185 if (copy_from_user(&reg, argp, sizeof(reg)))
9b062471
CD
1186 break;
1187
749cf76c 1188 if (ioctl == KVM_SET_ONE_REG)
9b062471 1189 r = kvm_arm_set_reg(vcpu, &reg);
749cf76c 1190 else
9b062471
CD
1191 r = kvm_arm_get_reg(vcpu, &reg);
1192 break;
749cf76c
CD
1193 }
1194 case KVM_GET_REG_LIST: {
1195 struct kvm_reg_list __user *user_list = argp;
1196 struct kvm_reg_list reg_list;
1197 unsigned n;
1198
9b062471 1199 r = -ENOEXEC;
e8180dca 1200 if (unlikely(!kvm_vcpu_initialized(vcpu)))
9b062471 1201 break;
e8180dca 1202
7dd32a0d
DM
1203 r = -EPERM;
1204 if (!kvm_arm_vcpu_is_finalized(vcpu))
1205 break;
1206
9b062471 1207 r = -EFAULT;
749cf76c 1208 if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
9b062471 1209 break;
749cf76c
CD
1210 n = reg_list.n;
1211 reg_list.n = kvm_arm_num_regs(vcpu);
1212 if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
9b062471
CD
1213 break;
1214 r = -E2BIG;
749cf76c 1215 if (n < reg_list.n)
9b062471
CD
1216 break;
1217 r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
1218 break;
749cf76c 1219 }
f577f6c2 1220 case KVM_SET_DEVICE_ATTR: {
9b062471 1221 r = -EFAULT;
f577f6c2 1222 if (copy_from_user(&attr, argp, sizeof(attr)))
9b062471
CD
1223 break;
1224 r = kvm_arm_vcpu_set_attr(vcpu, &attr);
1225 break;
f577f6c2
SZ
1226 }
1227 case KVM_GET_DEVICE_ATTR: {
9b062471 1228 r = -EFAULT;
f577f6c2 1229 if (copy_from_user(&attr, argp, sizeof(attr)))
9b062471
CD
1230 break;
1231 r = kvm_arm_vcpu_get_attr(vcpu, &attr);
1232 break;
f577f6c2
SZ
1233 }
1234 case KVM_HAS_DEVICE_ATTR: {
9b062471 1235 r = -EFAULT;
f577f6c2 1236 if (copy_from_user(&attr, argp, sizeof(attr)))
9b062471
CD
1237 break;
1238 r = kvm_arm_vcpu_has_attr(vcpu, &attr);
1239 break;
f577f6c2 1240 }
b7b27fac
DG
1241 case KVM_GET_VCPU_EVENTS: {
1242 struct kvm_vcpu_events events;
1243
1244 if (kvm_arm_vcpu_get_events(vcpu, &events))
1245 return -EINVAL;
1246
1247 if (copy_to_user(argp, &events, sizeof(events)))
1248 return -EFAULT;
1249
1250 return 0;
1251 }
1252 case KVM_SET_VCPU_EVENTS: {
1253 struct kvm_vcpu_events events;
1254
1255 if (copy_from_user(&events, argp, sizeof(events)))
1256 return -EFAULT;
1257
1258 return kvm_arm_vcpu_set_events(vcpu, &events);
1259 }
7dd32a0d
DM
1260 case KVM_ARM_VCPU_FINALIZE: {
1261 int what;
1262
1263 if (!kvm_vcpu_initialized(vcpu))
1264 return -ENOEXEC;
1265
1266 if (get_user(what, (const int __user *)argp))
1267 return -EFAULT;
1268
1269 return kvm_arm_vcpu_finalize(vcpu, what);
1270 }
749cf76c 1271 default:
9b062471 1272 r = -EINVAL;
749cf76c 1273 }
9b062471 1274
9b062471 1275 return r;
749cf76c
CD
1276}
1277
0dff0846 1278void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
749cf76c 1279{
53c810c3 1280
749cf76c
CD
1281}
1282
0dff0846
SC
1283void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
1284 struct kvm_memory_slot *memslot)
2a31b9db 1285{
0dff0846 1286 kvm_flush_remote_tlbs(kvm);
749cf76c
CD
1287}
1288
3401d546
CD
1289static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1290 struct kvm_arm_device_addr *dev_addr)
1291{
330690cd
CD
1292 unsigned long dev_id, type;
1293
1294 dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
1295 KVM_ARM_DEVICE_ID_SHIFT;
1296 type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
1297 KVM_ARM_DEVICE_TYPE_SHIFT;
1298
1299 switch (dev_id) {
1300 case KVM_ARM_DEVICE_VGIC_V2:
c7da6fa4
PF
1301 if (!vgic_present)
1302 return -ENXIO;
ce01e4e8 1303 return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
330690cd
CD
1304 default:
1305 return -ENODEV;
1306 }
3401d546
CD
1307}
1308
749cf76c
CD
1309long kvm_arch_vm_ioctl(struct file *filp,
1310 unsigned int ioctl, unsigned long arg)
1311{
3401d546
CD
1312 struct kvm *kvm = filp->private_data;
1313 void __user *argp = (void __user *)arg;
1314
1315 switch (ioctl) {
5863c2ce 1316 case KVM_CREATE_IRQCHIP: {
a28ebea2 1317 int ret;
c7da6fa4
PF
1318 if (!vgic_present)
1319 return -ENXIO;
a28ebea2
CD
1320 mutex_lock(&kvm->lock);
1321 ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
1322 mutex_unlock(&kvm->lock);
1323 return ret;
5863c2ce 1324 }
3401d546
CD
1325 case KVM_ARM_SET_DEVICE_ADDR: {
1326 struct kvm_arm_device_addr dev_addr;
1327
1328 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
1329 return -EFAULT;
1330 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
1331 }
42c4e0c7
AP
1332 case KVM_ARM_PREFERRED_TARGET: {
1333 int err;
1334 struct kvm_vcpu_init init;
1335
1336 err = kvm_vcpu_preferred_target(&init);
1337 if (err)
1338 return err;
1339
1340 if (copy_to_user(argp, &init, sizeof(init)))
1341 return -EFAULT;
1342
1343 return 0;
1344 }
3401d546
CD
1345 default:
1346 return -EINVAL;
1347 }
749cf76c
CD
1348}
1349
30c95391
DB
1350static unsigned long nvhe_percpu_size(void)
1351{
1352 return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
1353 (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_start);
1354}
1355
1356static unsigned long nvhe_percpu_order(void)
1357{
1358 unsigned long size = nvhe_percpu_size();
1359
1360 return size ? get_order(size) : 0;
1361}
1362
b881cdce
WD
1363/* A lookup table holding the hypervisor VA for each vector slot */
1364static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
1365
4f6a36fe 1366static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot)
9ef2b48b 1367{
4f6a36fe
WD
1368 return slot - (slot != HYP_VECTOR_DIRECT);
1369}
9ef2b48b 1370
b881cdce 1371static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot)
9ef2b48b 1372{
4f6a36fe 1373 int idx = __kvm_vector_slot2idx(slot);
9ef2b48b 1374
4f6a36fe 1375 hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K);
b881cdce
WD
1376}
1377
1378static int kvm_init_vector_slots(void)
1379{
1380 int err;
1381 void *base;
1382
1383 base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
1384 kvm_init_vector_slot(base, HYP_VECTOR_DIRECT);
1385
1386 base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
1387 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
de5bcdb4 1388
c4792b6d 1389 if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
de5bcdb4 1390 return 0;
9ef2b48b 1391
b881cdce
WD
1392 if (!has_vhe()) {
1393 err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
1394 __BP_HARDEN_HYP_VECS_SZ, &base);
1395 if (err)
1396 return err;
9ef2b48b
WD
1397 }
1398
b881cdce
WD
1399 kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT);
1400 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT);
9ef2b48b
WD
1401 return 0;
1402}
1403
7e0befd5 1404static void cpu_init_hyp_mode(void)
342cd0ab 1405{
63fec243 1406 struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params);
04e4caa8 1407 struct arm_smccc_res res;
d3e1086c 1408 unsigned long tcr;
342cd0ab
CD
1409
1410 /* Switch from the HYP stub to our own HYP init vector */
5a677ce0 1411 __hyp_set_vectors(kvm_get_idmap_vector());
342cd0ab 1412
71b3ec5f
DB
1413 /*
1414 * Calculate the raw per-cpu offset without a translation from the
1415 * kernel's mapping to the linear mapping, and store it in tpidr_el2
1416 * so that we can use adr_l to access per-cpu variables in EL2.
e1663372 1417 * Also drop the KASAN tag which gets in the way...
71b3ec5f 1418 */
e1663372 1419 params->tpidr_el2 = (unsigned long)kasan_reset_tag(this_cpu_ptr_nvhe_sym(__per_cpu_start)) -
63fec243 1420 (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
71b3ec5f 1421
d3e1086c
DB
1422 params->mair_el2 = read_sysreg(mair_el1);
1423
1424 /*
1425 * The ID map may be configured to use an extended virtual address
1426 * range. This is only the case if system RAM is out of range for the
1427 * currently configured page size and VA_BITS, in which case we will
1428 * also need the extended virtual range for the HYP ID map, or we won't
1429 * be able to enable the EL2 MMU.
1430 *
1431 * However, at EL2, there is only one TTBR register, and we can't switch
1432 * between translation tables *and* update TCR_EL2.T0SZ at the same
1433 * time. Bottom line: we need to use the extended range with *both* our
1434 * translation tables.
1435 *
1436 * So use the same T0SZ value we use for the ID map.
1437 */
1438 tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
1439 tcr &= ~TCR_T0SZ_MASK;
1440 tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
1441 params->tcr_el2 = tcr;
1442
63fec243
DB
1443 params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE);
1444 params->pgd_pa = kvm_mmu_get_httbr();
71b3ec5f 1445
63fec243
DB
1446 /*
1447 * Flush the init params from the data cache because the struct will
1448 * be read while the MMU is off.
1449 */
1450 kvm_flush_dcache_to_poc(params, sizeof(*params));
342cd0ab 1451
71b3ec5f
DB
1452 /*
1453 * Call initialization code, and switch to the full blown HYP code.
1454 * If the cpucaps haven't been finalized yet, something has gone very
1455 * wrong, and hyp will crash and burn when it uses any
1456 * cpus_have_const_cap() wrapper.
1457 */
1458 BUG_ON(!system_capabilities_finalized());
63fec243 1459 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
04e4caa8 1460 WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
71b3ec5f
DB
1461
1462 /*
1463 * Disabling SSBD on a non-VHE system requires us to enable SSBS
1464 * at EL2.
1465 */
1466 if (this_cpu_has_cap(ARM64_SSBS) &&
d63d975a 1467 arm64_get_spectre_v4_state() == SPECTRE_VULNERABLE) {
13aeb9b4 1468 kvm_call_hyp_nvhe(__kvm_enable_ssbs);
71b3ec5f 1469 }
342cd0ab
CD
1470}
1471
47eb3cba
MZ
1472static void cpu_hyp_reset(void)
1473{
1474 if (!is_kernel_in_hyp_mode())
1475 __hyp_reset_vectors();
1476}
1477
042c76a9
WD
1478/*
1479 * EL2 vectors can be mapped and rerouted in a number of ways,
1480 * depending on the kernel configuration and CPU present:
1481 *
1482 * - If the CPU is affected by Spectre-v2, the hardening sequence is
1483 * placed in one of the vector slots, which is executed before jumping
1484 * to the real vectors.
1485 *
c4792b6d 1486 * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot
042c76a9
WD
1487 * containing the hardening sequence is mapped next to the idmap page,
1488 * and executed before jumping to the real vectors.
1489 *
c4792b6d 1490 * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an
042c76a9
WD
1491 * empty slot is selected, mapped next to the idmap page, and
1492 * executed before jumping to the real vectors.
1493 *
c4792b6d 1494 * Note that ARM64_SPECTRE_V3A is somewhat incompatible with
042c76a9
WD
1495 * VHE, as we don't have hypervisor-specific mappings. If the system
1496 * is VHE and yet selects this capability, it will be ignored.
1497 */
1498static void cpu_set_hyp_vector(void)
1499{
6279017e 1500 struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
b881cdce 1501 void *vector = hyp_spectre_vector_selector[data->slot];
042c76a9 1502
b881cdce 1503 *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
042c76a9
WD
1504}
1505
5f5560b1
JM
1506static void cpu_hyp_reinit(void)
1507{
2a1198c9 1508 kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
1e0cf16c 1509
47eb3cba 1510 cpu_hyp_reset();
042c76a9 1511 cpu_set_hyp_vector();
a0e47952 1512
9d47bb0d 1513 if (is_kernel_in_hyp_mode())
02d50cda 1514 kvm_timer_init_vhe();
9d47bb0d 1515 else
7e0befd5 1516 cpu_init_hyp_mode();
5b0d2cc2 1517
da5a3ce6 1518 kvm_arm_init_debug();
5b0d2cc2
CD
1519
1520 if (vgic_present)
1521 kvm_vgic_init_cpu_hardware();
5f5560b1
JM
1522}
1523
67f69197
AT
1524static void _kvm_arch_hardware_enable(void *discard)
1525{
1526 if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
5f5560b1 1527 cpu_hyp_reinit();
67f69197 1528 __this_cpu_write(kvm_arm_hardware_enabled, 1);
d157f4a5 1529 }
67f69197 1530}
d157f4a5 1531
67f69197
AT
1532int kvm_arch_hardware_enable(void)
1533{
1534 _kvm_arch_hardware_enable(NULL);
1535 return 0;
342cd0ab
CD
1536}
1537
67f69197
AT
1538static void _kvm_arch_hardware_disable(void *discard)
1539{
1540 if (__this_cpu_read(kvm_arm_hardware_enabled)) {
1541 cpu_hyp_reset();
1542 __this_cpu_write(kvm_arm_hardware_enabled, 0);
1543 }
1544}
1545
1546void kvm_arch_hardware_disable(void)
1547{
fa8c3d65
DB
1548 if (!is_protected_kvm_enabled())
1549 _kvm_arch_hardware_disable(NULL);
67f69197 1550}
d157f4a5 1551
1fcf7ce0
LP
1552#ifdef CONFIG_CPU_PM
1553static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1554 unsigned long cmd,
1555 void *v)
1556{
67f69197
AT
1557 /*
1558 * kvm_arm_hardware_enabled is left with its old value over
1559 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
1560 * re-enable hyp.
1561 */
1562 switch (cmd) {
1563 case CPU_PM_ENTER:
1564 if (__this_cpu_read(kvm_arm_hardware_enabled))
1565 /*
1566 * don't update kvm_arm_hardware_enabled here
1567 * so that the hardware will be re-enabled
1568 * when we resume. See below.
1569 */
1570 cpu_hyp_reset();
1571
1fcf7ce0 1572 return NOTIFY_OK;
58d6b15e 1573 case CPU_PM_ENTER_FAILED:
67f69197
AT
1574 case CPU_PM_EXIT:
1575 if (__this_cpu_read(kvm_arm_hardware_enabled))
1576 /* The hardware was enabled before suspend. */
1577 cpu_hyp_reinit();
1fcf7ce0 1578
67f69197
AT
1579 return NOTIFY_OK;
1580
1581 default:
1582 return NOTIFY_DONE;
1583 }
1fcf7ce0
LP
1584}
1585
1586static struct notifier_block hyp_init_cpu_pm_nb = {
1587 .notifier_call = hyp_init_cpu_pm_notifier,
1588};
1589
44362a3c 1590static void hyp_cpu_pm_init(void)
1fcf7ce0 1591{
fa8c3d65
DB
1592 if (!is_protected_kvm_enabled())
1593 cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
1fcf7ce0 1594}
44362a3c 1595static void hyp_cpu_pm_exit(void)
06a71a24 1596{
fa8c3d65
DB
1597 if (!is_protected_kvm_enabled())
1598 cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
06a71a24 1599}
1fcf7ce0
LP
1600#else
1601static inline void hyp_cpu_pm_init(void)
1602{
1603}
06a71a24
SH
1604static inline void hyp_cpu_pm_exit(void)
1605{
1606}
1fcf7ce0
LP
1607#endif
1608
94f5e8a4
DB
1609static void init_cpu_logical_map(void)
1610{
1611 unsigned int cpu;
1612
1613 /*
1614 * Copy the MPIDR <-> logical CPU ID mapping to hyp.
1615 * Only copy the set of online CPUs whose features have been chacked
1616 * against the finalized system capabilities. The hypervisor will not
1617 * allow any other CPUs from the `possible` set to boot.
1618 */
1619 for_each_online_cpu(cpu)
61fe0c37 1620 hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu);
94f5e8a4
DB
1621}
1622
767c973f
MZ
1623#define init_psci_0_1_impl_state(config, what) \
1624 config.psci_0_1_ ## what ## _implemented = psci_ops.what
1625
eeeee719
DB
1626static bool init_psci_relay(void)
1627{
1628 /*
1629 * If PSCI has not been initialized, protected KVM cannot install
1630 * itself on newly booted CPUs.
1631 */
1632 if (!psci_ops.get_version) {
1633 kvm_err("Cannot initialize protected mode without PSCI\n");
1634 return false;
1635 }
1636
ff367fe4
DB
1637 kvm_host_psci_config.version = psci_ops.get_version();
1638
1639 if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) {
1640 kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids();
767c973f
MZ
1641 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend);
1642 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on);
1643 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off);
1644 init_psci_0_1_impl_state(kvm_host_psci_config, migrate);
ff367fe4 1645 }
eeeee719
DB
1646 return true;
1647}
1648
1e947bad
MZ
1649static int init_common_resources(void)
1650{
b130a8f7 1651 return kvm_set_ipa_limit();
1e947bad
MZ
1652}
1653
1654static int init_subsystems(void)
1655{
67f69197 1656 int err = 0;
1e947bad 1657
5f5560b1 1658 /*
67f69197 1659 * Enable hardware so that subsystem initialisation can access EL2.
5f5560b1 1660 */
67f69197 1661 on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
5f5560b1
JM
1662
1663 /*
1664 * Register CPU lower-power notifier
1665 */
1666 hyp_cpu_pm_init();
1667
1e947bad
MZ
1668 /*
1669 * Init HYP view of VGIC
1670 */
1671 err = kvm_vgic_hyp_init();
1672 switch (err) {
1673 case 0:
1674 vgic_present = true;
1675 break;
1676 case -ENODEV:
1677 case -ENXIO:
1678 vgic_present = false;
67f69197 1679 err = 0;
1e947bad
MZ
1680 break;
1681 default:
67f69197 1682 goto out;
1e947bad
MZ
1683 }
1684
1685 /*
1686 * Init HYP architected timer support
1687 */
f384dcfe 1688 err = kvm_timer_hyp_init(vgic_present);
1e947bad 1689 if (err)
67f69197 1690 goto out;
1e947bad
MZ
1691
1692 kvm_perf_init();
6ac4a5ac 1693 kvm_sys_reg_table_init();
1e947bad 1694
67f69197 1695out:
fa8c3d65
DB
1696 if (err || !is_protected_kvm_enabled())
1697 on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
67f69197
AT
1698
1699 return err;
1e947bad
MZ
1700}
1701
1702static void teardown_hyp_mode(void)
1703{
1704 int cpu;
1705
1e947bad 1706 free_hyp_pgds();
30c95391 1707 for_each_possible_cpu(cpu) {
1e947bad 1708 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
30c95391
DB
1709 free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
1710 }
1e947bad
MZ
1711}
1712
342cd0ab
CD
1713/**
1714 * Inits Hyp-mode on all online CPUs
1715 */
1716static int init_hyp_mode(void)
1717{
342cd0ab
CD
1718 int cpu;
1719 int err = 0;
1720
1721 /*
1722 * Allocate Hyp PGD and setup Hyp identity mapping
1723 */
1724 err = kvm_mmu_init();
1725 if (err)
1726 goto out_err;
1727
342cd0ab
CD
1728 /*
1729 * Allocate stack pages for Hypervisor-mode
1730 */
1731 for_each_possible_cpu(cpu) {
1732 unsigned long stack_page;
1733
1734 stack_page = __get_free_page(GFP_KERNEL);
1735 if (!stack_page) {
1736 err = -ENOMEM;
1e947bad 1737 goto out_err;
342cd0ab
CD
1738 }
1739
1740 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1741 }
1742
30c95391
DB
1743 /*
1744 * Allocate and initialize pages for Hypervisor-mode percpu regions.
1745 */
1746 for_each_possible_cpu(cpu) {
1747 struct page *page;
1748 void *page_addr;
1749
1750 page = alloc_pages(GFP_KERNEL, nvhe_percpu_order());
1751 if (!page) {
1752 err = -ENOMEM;
1753 goto out_err;
1754 }
1755
1756 page_addr = page_address(page);
1757 memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size());
1758 kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
1759 }
1760
342cd0ab
CD
1761 /*
1762 * Map the Hyp-code called directly from the host
1763 */
588ab3f9 1764 err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
59002705 1765 kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
342cd0ab
CD
1766 if (err) {
1767 kvm_err("Cannot map world-switch code\n");
1e947bad 1768 goto out_err;
342cd0ab
CD
1769 }
1770
2d7bf218
DB
1771 err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
1772 kvm_ksym_ref(__hyp_data_ro_after_init_end),
1773 PAGE_HYP_RO);
1774 if (err) {
1775 kvm_err("Cannot map .hyp.data..ro_after_init section\n");
1776 goto out_err;
1777 }
1778
a0bf9776 1779 err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
74a6b888 1780 kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
910917bb
MZ
1781 if (err) {
1782 kvm_err("Cannot map rodata section\n");
c8ea0395
MZ
1783 goto out_err;
1784 }
1785
1786 err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
1787 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
1788 if (err) {
1789 kvm_err("Cannot map bss section\n");
1e947bad 1790 goto out_err;
910917bb
MZ
1791 }
1792
342cd0ab
CD
1793 /*
1794 * Map the Hyp stack pages
1795 */
1796 for_each_possible_cpu(cpu) {
1797 char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
c8dddecd
MZ
1798 err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
1799 PAGE_HYP);
342cd0ab
CD
1800
1801 if (err) {
1802 kvm_err("Cannot map hyp stack\n");
1e947bad 1803 goto out_err;
342cd0ab
CD
1804 }
1805 }
1806
30c95391
DB
1807 /*
1808 * Map Hyp percpu pages
1809 */
342cd0ab 1810 for_each_possible_cpu(cpu) {
30c95391
DB
1811 char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
1812 char *percpu_end = percpu_begin + nvhe_percpu_size();
342cd0ab 1813
30c95391 1814 err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP);
342cd0ab
CD
1815
1816 if (err) {
30c95391 1817 kvm_err("Cannot map hyp percpu region\n");
6e3bfbb2
AS
1818 goto out_err;
1819 }
342cd0ab
CD
1820 }
1821
eeeee719 1822 if (is_protected_kvm_enabled()) {
94f5e8a4
DB
1823 init_cpu_logical_map();
1824
fcb7b771
WW
1825 if (!init_psci_relay()) {
1826 err = -ENODEV;
eeeee719 1827 goto out_err;
fcb7b771 1828 }
eeeee719
DB
1829 }
1830
342cd0ab 1831 return 0;
1e947bad 1832
342cd0ab 1833out_err:
1e947bad 1834 teardown_hyp_mode();
342cd0ab
CD
1835 kvm_err("error initializing Hyp mode: %d\n", err);
1836 return err;
1837}
1838
d4e071ce
AP
1839static void check_kvm_target_cpu(void *ret)
1840{
1841 *(int *)ret = kvm_target_cpu();
1842}
1843
4429fc64
AP
1844struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
1845{
1846 struct kvm_vcpu *vcpu;
1847 int i;
1848
1849 mpidr &= MPIDR_HWID_BITMASK;
1850 kvm_for_each_vcpu(i, vcpu, kvm) {
1851 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
1852 return vcpu;
1853 }
1854 return NULL;
1855}
1856
2412405b
EA
1857bool kvm_arch_has_irq_bypass(void)
1858{
1859 return true;
1860}
1861
1862int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
1863 struct irq_bypass_producer *prod)
1864{
1865 struct kvm_kernel_irqfd *irqfd =
1866 container_of(cons, struct kvm_kernel_irqfd, consumer);
1867
196b1364
MZ
1868 return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
1869 &irqfd->irq_entry);
2412405b
EA
1870}
1871void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
1872 struct irq_bypass_producer *prod)
1873{
1874 struct kvm_kernel_irqfd *irqfd =
1875 container_of(cons, struct kvm_kernel_irqfd, consumer);
1876
196b1364
MZ
1877 kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
1878 &irqfd->irq_entry);
2412405b
EA
1879}
1880
1881void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
1882{
1883 struct kvm_kernel_irqfd *irqfd =
1884 container_of(cons, struct kvm_kernel_irqfd, consumer);
1885
1886 kvm_arm_halt_guest(irqfd->kvm);
1887}
1888
1889void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
1890{
1891 struct kvm_kernel_irqfd *irqfd =
1892 container_of(cons, struct kvm_kernel_irqfd, consumer);
1893
1894 kvm_arm_resume_guest(irqfd->kvm);
1895}
1896
342cd0ab
CD
1897/**
1898 * Initialize Hyp-mode and memory mappings on all CPUs.
1899 */
749cf76c
CD
1900int kvm_arch_init(void *opaque)
1901{
342cd0ab 1902 int err;
d4e071ce 1903 int ret, cpu;
fe7d7b03 1904 bool in_hyp_mode;
342cd0ab
CD
1905
1906 if (!is_hyp_mode_available()) {
58d0d19a 1907 kvm_info("HYP mode not available\n");
342cd0ab
CD
1908 return -ENODEV;
1909 }
1910
33e5f4e5
MZ
1911 in_hyp_mode = is_kernel_in_hyp_mode();
1912
1913 if (!in_hyp_mode && kvm_arch_requires_vhe()) {
1914 kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
85acda3b
DM
1915 return -ENODEV;
1916 }
1917
96d389ca
RH
1918 if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
1919 cpus_have_final_cap(ARM64_WORKAROUND_1508412))
abf532cc
RH
1920 kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
1921 "Only trusted guests should be used on this system.\n");
1922
d4e071ce
AP
1923 for_each_online_cpu(cpu) {
1924 smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
1925 if (ret < 0) {
1926 kvm_err("Error, CPU %d not supported!\n", cpu);
1927 return -ENODEV;
1928 }
342cd0ab
CD
1929 }
1930
1e947bad 1931 err = init_common_resources();
342cd0ab 1932 if (err)
1e947bad 1933 return err;
342cd0ab 1934
a3be836d 1935 err = kvm_arm_init_sve();
0f062bfe
DM
1936 if (err)
1937 return err;
1938
fe7d7b03 1939 if (!in_hyp_mode) {
1e947bad 1940 err = init_hyp_mode();
fe7d7b03
JT
1941 if (err)
1942 goto out_err;
1943 }
8146875d 1944
b881cdce
WD
1945 err = kvm_init_vector_slots();
1946 if (err) {
1947 kvm_err("Cannot initialise vector slots\n");
1948 goto out_err;
1949 }
1950
1e947bad
MZ
1951 err = init_subsystems();
1952 if (err)
1953 goto out_hyp;
1fcf7ce0 1954
f19f6644
DB
1955 if (is_protected_kvm_enabled()) {
1956 static_branch_enable(&kvm_protected_mode_initialized);
3eb681fb 1957 kvm_info("Protected nVHE mode initialized successfully\n");
f19f6644 1958 } else if (in_hyp_mode) {
fe7d7b03 1959 kvm_info("VHE mode initialized successfully\n");
f19f6644 1960 } else {
fe7d7b03 1961 kvm_info("Hyp mode initialized successfully\n");
f19f6644 1962 }
fe7d7b03 1963
749cf76c 1964 return 0;
1e947bad
MZ
1965
1966out_hyp:
c3e35409 1967 hyp_cpu_pm_exit();
fe7d7b03
JT
1968 if (!in_hyp_mode)
1969 teardown_hyp_mode();
342cd0ab
CD
1970out_err:
1971 return err;
749cf76c
CD
1972}
1973
1974/* NOP: Compiling as a module not supported */
1975void kvm_arch_exit(void)
1976{
210552c1 1977 kvm_perf_teardown();
749cf76c
CD
1978}
1979
d8b369c4
DB
1980static int __init early_kvm_mode_cfg(char *arg)
1981{
1982 if (!arg)
1983 return -EINVAL;
1984
1985 if (strcmp(arg, "protected") == 0) {
1986 kvm_mode = KVM_MODE_PROTECTED;
1987 return 0;
1988 }
1989
1990 return -EINVAL;
1991}
1992early_param("kvm-arm.mode", early_kvm_mode_cfg);
1993
3eb681fb
DB
1994enum kvm_mode kvm_get_mode(void)
1995{
1996 return kvm_mode;
1997}
1998
749cf76c
CD
1999static int arm_init(void)
2000{
2001 int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2002 return rc;
2003}
2004
2005module_init(arm_init);