On Sat, Oct 26, 2024 at 08:43:21AM +0100, Marc Zyngier wrote:
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bf64fed9820e..c315bc1a4e9a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -74,8 +74,6 @@ enum kvm_mode kvm_get_mode(void); static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; }; #endif -DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int __ro_after_init kvm_sve_max_vl; extern unsigned int __ro_after_init kvm_host_sve_max_vl; int __init kvm_arm_init_sve(void); diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 879982b1cc73..1215df590418 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -206,8 +206,7 @@ void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) static inline bool userspace_irqchip(struct kvm *kvm) {
- return static_branch_unlikely(&userspace_irqchip_in_use) &&
unlikely(!irqchip_in_kernel(kvm));
- return unlikely(!irqchip_in_kernel(kvm));
} static void soft_timer_start(struct hrtimer *hrt, u64 ns) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 48cafb65d6ac..70ff9a20ef3a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -69,7 +69,6 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); static bool vgic_present, kvm_arm_initialised; static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized); -DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); bool is_kvm_arm_initialised(void) { @@ -503,9 +502,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) {
- if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
- kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu);
@@ -848,14 +844,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) return ret; }
- if (!irqchip_in_kernel(kvm)) {
/*
* Tell the rest of the code that there are userspace irqchip
* VMs in the wild.
*/
static_branch_inc(&userspace_irqchip_in_use);
- }
- /*
- Initialize traps for protected VMs.
- NOTE: Move to run in EL2 directly, rather than via a hypercall, once
@@ -1077,7 +1065,7 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) * state gets updated in kvm_timer_update_run and * kvm_pmu_update_run below). */
- if (static_branch_unlikely(&userspace_irqchip_in_use)) {
- if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { if (kvm_timer_should_notify_user(vcpu) || kvm_pmu_should_notify_user(vcpu)) { *ret = -EINTR;
@@ -1199,7 +1187,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; isb(); /* Ensure work in x_flush_hwstate is committed */ kvm_pmu_sync_hwstate(vcpu);
if (static_branch_unlikely(&userspace_irqchip_in_use))
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable();
@@ -1245,7 +1233,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * we don't want vtimer interrupts to race with syncing the * timer virtual interrupt state. */
if (static_branch_unlikely(&userspace_irqchip_in_use))
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) kvm_timer_sync_user(vcpu);
kvm_arch_vcpu_ctxsync_fp(vcpu);
I think this would fix the problem you're seeing without changing the userspace view of an erroneous configuration. It would also pave the way for the complete removal of the interrupt notification to userspace, which I claim has no user and is just a shit idea.
Yeah, looks like this ought to get it done.
Even with a fix for this particular issue I do wonder if we should categorically harden against late initialization failures and un-init the vCPU (or bug VM, where necessary) to avoid dealing with half-baked vCPUs/VMs across our UAPI surfaces.
A sane userspace will probably crash when KVM_RUN returns EINVAL anyway.