Hi,
This is version 2 of a backport for v5.4.
Changes since v2: 1. Send proper backport without context changes, 2. This series is only for v5.4.
Changes since v1: 1. Clean backport, without context changes.
The first patch specifically fixes bug aftert 2nd resume: BUG: Bad page state in process dbus-daemon pfn:18b01 page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591 flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim) raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000 raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
Best regards, Krzysztof
Vitaly Kuznetsov (3): x86/kvm: Teardown PV features on boot CPU as well x86/kvm: Disable kvmclock on all CPUs on shutdown x86/kvm: Disable all PV features on crash
arch/x86/include/asm/kvm_para.h | 10 +--- arch/x86/kernel/kvm.c | 92 ++++++++++++++++++++++++--------- arch/x86/kernel/kvmclock.c | 26 +--------- 3 files changed, 72 insertions(+), 56 deletions(-)
From: Vitaly Kuznetsov vkuznets@redhat.com
commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream.
Various PV features (Async PF, PV EOI, steal time) work through memory shared with hypervisor and when we restore from hibernation we must properly teardown all these features to make sure hypervisor doesn't write to stale locations after we jump to the previously hibernated kernel (which can try to place anything there). For secondary CPUs the job is already done by kvm_cpu_down_prepare(), register syscore ops to do the same for boot CPU.
Krzysztof: This fixes memory corruption visible after second resume from hibernation:
BUG: Bad page state in process dbus-daemon pfn:18b01 page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591 flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim) raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000 raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Message-Id: 20210414123544.1060604-3-vkuznets@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Andrea Righi andrea.righi@canonical.com [krzysztof: Extend the commit message] Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com --- arch/x86/kernel/kvm.c | 57 +++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e820568ed4d5..82a2756e0ef8 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -24,6 +24,7 @@ #include <linux/debugfs.h> #include <linux/nmi.h> #include <linux/swait.h> +#include <linux/syscore_ops.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -428,6 +429,25 @@ static void __init sev_map_percpu_data(void) } }
+static void kvm_guest_cpu_offline(void) +{ + kvm_disable_steal_time(); + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); + apf_task_wake_all(); +} + +static int kvm_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_init(); + local_irq_restore(flags); + return 0; +} + #ifdef CONFIG_SMP #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
@@ -547,31 +567,34 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); }
-static void kvm_guest_cpu_offline(void) +static int kvm_cpu_down_prepare(unsigned int cpu) { - kvm_disable_steal_time(); - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - apf_task_wake_all(); -} + unsigned long flags;
-static int kvm_cpu_online(unsigned int cpu) -{ - local_irq_disable(); - kvm_guest_cpu_init(); - local_irq_enable(); + local_irq_save(flags); + kvm_guest_cpu_offline(); + local_irq_restore(flags); return 0; }
-static int kvm_cpu_down_prepare(unsigned int cpu) +#endif + +static int kvm_suspend(void) { - local_irq_disable(); kvm_guest_cpu_offline(); - local_irq_enable(); + return 0; } -#endif + +static void kvm_resume(void) +{ + kvm_cpu_online(raw_smp_processor_id()); +} + +static struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +};
static void __init kvm_apf_trap_init(void) { @@ -649,6 +672,8 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif
+ register_syscore_ops(&kvm_syscore_ops); + /* * Hard lockup detection is enabled by default. Disable it, as guests * can get false positives too easily, for example if the host is
On 31/05/21 16:03, Krzysztof Kozlowski wrote:
From: Vitaly Kuznetsov vkuznets@redhat.com
commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream.
Various PV features (Async PF, PV EOI, steal time) work through memory shared with hypervisor and when we restore from hibernation we must properly teardown all these features to make sure hypervisor doesn't write to stale locations after we jump to the previously hibernated kernel (which can try to place anything there). For secondary CPUs the job is already done by kvm_cpu_down_prepare(), register syscore ops to do the same for boot CPU.
Krzysztof: This fixes memory corruption visible after second resume from hibernation:
BUG: Bad page state in process dbus-daemon pfn:18b01 page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591 flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim) raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000 raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Message-Id: 20210414123544.1060604-3-vkuznets@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Andrea Righi andrea.righi@canonical.com [krzysztof: Extend the commit message] Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
arch/x86/kernel/kvm.c | 57 +++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e820568ed4d5..82a2756e0ef8 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -24,6 +24,7 @@ #include <linux/debugfs.h> #include <linux/nmi.h> #include <linux/swait.h> +#include <linux/syscore_ops.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -428,6 +429,25 @@ static void __init sev_map_percpu_data(void) } } +static void kvm_guest_cpu_offline(void) +{
- kvm_disable_steal_time();
- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
- kvm_pv_disable_apf();
- apf_task_wake_all();
+}
+static int kvm_cpu_online(unsigned int cpu) +{
- unsigned long flags;
- local_irq_save(flags);
- kvm_guest_cpu_init();
- local_irq_restore(flags);
- return 0;
+}
- #ifdef CONFIG_SMP #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
@@ -547,31 +567,34 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); } -static void kvm_guest_cpu_offline(void) +static int kvm_cpu_down_prepare(unsigned int cpu) {
- kvm_disable_steal_time();
- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
- kvm_pv_disable_apf();
- apf_task_wake_all();
-}
- unsigned long flags;
-static int kvm_cpu_online(unsigned int cpu) -{
- local_irq_disable();
- kvm_guest_cpu_init();
- local_irq_enable();
- local_irq_save(flags);
- kvm_guest_cpu_offline();
- local_irq_restore(flags); return 0; }
-static int kvm_cpu_down_prepare(unsigned int cpu) +#endif
+static int kvm_suspend(void) {
- local_irq_disable(); kvm_guest_cpu_offline();
- local_irq_enable();
- return 0; }
-#endif
+static void kvm_resume(void) +{
- kvm_cpu_online(raw_smp_processor_id());
+}
+static struct syscore_ops kvm_syscore_ops = {
- .suspend = kvm_suspend,
- .resume = kvm_resume,
+}; static void __init kvm_apf_trap_init(void) { @@ -649,6 +672,8 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif
- register_syscore_ops(&kvm_syscore_ops);
- /*
- Hard lockup detection is enabled by default. Disable it, as guests
- can get false positives too easily, for example if the host is
Acked-by: Paolo Bonzini pbonzini@redhat.com
From: Vitaly Kuznetsov vkuznets@redhat.com
commit c02027b5742b5aa804ef08a4a9db433295533046 upstream.
Currenly, we disable kvmclock from machine_shutdown() hook and this only happens for boot CPU. We need to disable it for all CPUs to guard against memory corruption e.g. on restore from hibernate.
Note, writing '0' to kvmclock MSR doesn't clear memory location, it just prevents hypervisor from updating the location so for the short while after write and while CPU is still alive, the clock remains usable and correct so we don't need to switch to some other clocksource.
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Message-Id: 20210414123544.1060604-4-vkuznets@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Andrea Righi andrea.righi@canonical.com Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com --- arch/x86/include/asm/kvm_para.h | 4 ++-- arch/x86/kernel/kvm.c | 1 + arch/x86/kernel/kvmclock.c | 5 +---- 3 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9b4df6eaa11a..a617fd360023 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -6,8 +6,6 @@ #include <asm/alternative.h> #include <uapi/asm/kvm_para.h>
-extern void kvmclock_init(void); - #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else @@ -85,6 +83,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, }
#ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 82a2756e0ef8..d6f04d32dec0 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -436,6 +436,7 @@ static void kvm_guest_cpu_offline(void) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); apf_task_wake_all(); + kvmclock_disable(); }
static int kvm_cpu_online(unsigned int cpu) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 904494b924c1..bd3962953f78 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -214,11 +214,9 @@ static void kvm_crash_shutdown(struct pt_regs *regs) } #endif
-static void kvm_shutdown(void) +void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_shutdown(); }
static void __init kvmclock_init_mem(void) @@ -346,7 +344,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; - machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC_CORE machine_ops.crash_shutdown = kvm_crash_shutdown; #endif
On 31/05/21 16:03, Krzysztof Kozlowski wrote:
From: Vitaly Kuznetsov vkuznets@redhat.com
commit c02027b5742b5aa804ef08a4a9db433295533046 upstream.
Currenly, we disable kvmclock from machine_shutdown() hook and this only happens for boot CPU. We need to disable it for all CPUs to guard against memory corruption e.g. on restore from hibernate.
Note, writing '0' to kvmclock MSR doesn't clear memory location, it just prevents hypervisor from updating the location so for the short while after write and while CPU is still alive, the clock remains usable and correct so we don't need to switch to some other clocksource.
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Message-Id: 20210414123544.1060604-4-vkuznets@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Andrea Righi andrea.righi@canonical.com Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
arch/x86/include/asm/kvm_para.h | 4 ++-- arch/x86/kernel/kvm.c | 1 + arch/x86/kernel/kvmclock.c | 5 +---- 3 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9b4df6eaa11a..a617fd360023 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -6,8 +6,6 @@ #include <asm/alternative.h> #include <uapi/asm/kvm_para.h> -extern void kvmclock_init(void);
- #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else
@@ -85,6 +83,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, } #ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 82a2756e0ef8..d6f04d32dec0 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -436,6 +436,7 @@ static void kvm_guest_cpu_offline(void) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); apf_task_wake_all();
- kvmclock_disable(); }
static int kvm_cpu_online(unsigned int cpu) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 904494b924c1..bd3962953f78 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -214,11 +214,9 @@ static void kvm_crash_shutdown(struct pt_regs *regs) } #endif -static void kvm_shutdown(void) +void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0);
- kvm_disable_steal_time();
- native_machine_shutdown(); }
static void __init kvmclock_init_mem(void) @@ -346,7 +344,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
- machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC_CORE machine_ops.crash_shutdown = kvm_crash_shutdown; #endif
Acked-by: Paolo Bonzini pbonzini@redhat.com
From: Vitaly Kuznetsov vkuznets@redhat.com
commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce upstream.
Crash shutdown handler only disables kvmclock and steal time, other PV features remain active so we risk corrupting memory or getting some side-effects in kdump kernel. Move crash handler to kvm.c and unify with CPU offline.
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Message-Id: 20210414123544.1060604-5-vkuznets@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com --- arch/x86/include/asm/kvm_para.h | 6 ----- arch/x86/kernel/kvm.c | 44 ++++++++++++++++++++++++--------- arch/x86/kernel/kvmclock.c | 21 ---------------- 3 files changed, 32 insertions(+), 39 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index a617fd360023..f913f62eb6c3 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -91,7 +91,6 @@ unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); -extern void kvm_disable_steal_time(void); void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
#ifdef CONFIG_PARAVIRT_SPINLOCKS @@ -125,11 +124,6 @@ static inline u32 kvm_read_and_reset_pf_reason(void) { return 0; } - -static inline void kvm_disable_steal_time(void) -{ - return; -} #endif
#endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d6f04d32dec0..6ff2c7cac4c4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -34,6 +34,7 @@ #include <asm/apicdef.h> #include <asm/hypervisor.h> #include <asm/tlb.h> +#include <asm/reboot.h>
static int kvmapf = 1;
@@ -352,6 +353,14 @@ static void kvm_pv_disable_apf(void) smp_processor_id()); }
+static void kvm_disable_steal_time(void) +{ + if (!has_steal_clock) + return; + + wrmsr(MSR_KVM_STEAL_TIME, 0, 0); +} + static void kvm_pv_guest_cpu_reboot(void *unused) { /* @@ -394,14 +403,6 @@ static u64 kvm_steal_clock(int cpu) return steal; }
-void kvm_disable_steal_time(void) -{ - if (!has_steal_clock) - return; - - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); -} - static inline void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size); @@ -429,13 +430,14 @@ static void __init sev_map_percpu_data(void) } }
-static void kvm_guest_cpu_offline(void) +static void kvm_guest_cpu_offline(bool shutdown) { kvm_disable_steal_time(); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); - apf_task_wake_all(); + if (!shutdown) + apf_task_wake_all(); kvmclock_disable(); }
@@ -573,7 +575,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) unsigned long flags;
local_irq_save(flags); - kvm_guest_cpu_offline(); + kvm_guest_cpu_offline(false); local_irq_restore(flags); return 0; } @@ -582,7 +584,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
static int kvm_suspend(void) { - kvm_guest_cpu_offline(); + kvm_guest_cpu_offline(false);
return 0; } @@ -597,6 +599,20 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, };
+/* + * After a PV feature is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. + */ +#ifdef CONFIG_KEXEC_CORE +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + kvm_guest_cpu_offline(true); + native_machine_crash_shutdown(regs); +} +#endif + static void __init kvm_apf_trap_init(void) { update_intr_gate(X86_TRAP_PF, async_page_fault); @@ -673,6 +689,10 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif
+#ifdef CONFIG_KEXEC_CORE + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + register_syscore_ops(&kvm_syscore_ops);
/* diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index bd3962953f78..4a0802af2e3e 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -20,7 +20,6 @@ #include <asm/hypervisor.h> #include <asm/mem_encrypt.h> #include <asm/x86_init.h> -#include <asm/reboot.h> #include <asm/kvmclock.h>
static int kvmclock __initdata = 1; @@ -197,23 +196,6 @@ static void kvm_setup_secondary_clock(void) } #endif
-/* - * After the clock is registered, the host will keep writing to the - * registered memory location. If the guest happens to shutdown, this memory - * won't be valid. In cases like kexec, in which you install a new kernel, this - * means a random memory location will be kept being written. So before any - * kind of shutdown from our side, we unregister the clock by writing anything - * that does not have the 'enable' bit set in the msr - */ -#ifdef CONFIG_KEXEC_CORE -static void kvm_crash_shutdown(struct pt_regs *regs) -{ - native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_crash_shutdown(regs); -} -#endif - void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); @@ -344,9 +326,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; -#ifdef CONFIG_KEXEC_CORE - machine_ops.crash_shutdown = kvm_crash_shutdown; -#endif kvm_get_preset_lpj();
/*
On 31/05/21 16:03, Krzysztof Kozlowski wrote:
From: Vitaly Kuznetsov vkuznets@redhat.com
commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce upstream.
Crash shutdown handler only disables kvmclock and steal time, other PV features remain active so we risk corrupting memory or getting some side-effects in kdump kernel. Move crash handler to kvm.c and unify with CPU offline.
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Message-Id: 20210414123544.1060604-5-vkuznets@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
arch/x86/include/asm/kvm_para.h | 6 ----- arch/x86/kernel/kvm.c | 44 ++++++++++++++++++++++++--------- arch/x86/kernel/kvmclock.c | 21 ---------------- 3 files changed, 32 insertions(+), 39 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index a617fd360023..f913f62eb6c3 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -91,7 +91,6 @@ unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); -extern void kvm_disable_steal_time(void); void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); #ifdef CONFIG_PARAVIRT_SPINLOCKS @@ -125,11 +124,6 @@ static inline u32 kvm_read_and_reset_pf_reason(void) { return 0; }
-static inline void kvm_disable_steal_time(void) -{
- return;
-} #endif #endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d6f04d32dec0..6ff2c7cac4c4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -34,6 +34,7 @@ #include <asm/apicdef.h> #include <asm/hypervisor.h> #include <asm/tlb.h> +#include <asm/reboot.h> static int kvmapf = 1; @@ -352,6 +353,14 @@ static void kvm_pv_disable_apf(void) smp_processor_id()); } +static void kvm_disable_steal_time(void) +{
- if (!has_steal_clock)
return;
- wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+}
- static void kvm_pv_guest_cpu_reboot(void *unused) { /*
@@ -394,14 +403,6 @@ static u64 kvm_steal_clock(int cpu) return steal; } -void kvm_disable_steal_time(void) -{
- if (!has_steal_clock)
return;
- wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
- static inline void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size);
@@ -429,13 +430,14 @@ static void __init sev_map_percpu_data(void) } } -static void kvm_guest_cpu_offline(void) +static void kvm_guest_cpu_offline(bool shutdown) { kvm_disable_steal_time(); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf();
- apf_task_wake_all();
- if (!shutdown)
kvmclock_disable(); }apf_task_wake_all();
@@ -573,7 +575,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) unsigned long flags; local_irq_save(flags);
- kvm_guest_cpu_offline();
- kvm_guest_cpu_offline(false); local_irq_restore(flags); return 0; }
@@ -582,7 +584,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) static int kvm_suspend(void) {
- kvm_guest_cpu_offline();
- kvm_guest_cpu_offline(false);
return 0; } @@ -597,6 +599,20 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, }; +/*
- After a PV feature is registered, the host will keep writing to the
- registered memory location. If the guest happens to shutdown, this memory
- won't be valid. In cases like kexec, in which you install a new kernel, this
- means a random memory location will be kept being written.
- */
+#ifdef CONFIG_KEXEC_CORE +static void kvm_crash_shutdown(struct pt_regs *regs) +{
- kvm_guest_cpu_offline(true);
- native_machine_crash_shutdown(regs);
+} +#endif
- static void __init kvm_apf_trap_init(void) { update_intr_gate(X86_TRAP_PF, async_page_fault);
@@ -673,6 +689,10 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif +#ifdef CONFIG_KEXEC_CORE
- machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
- register_syscore_ops(&kvm_syscore_ops);
/* diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index bd3962953f78..4a0802af2e3e 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -20,7 +20,6 @@ #include <asm/hypervisor.h> #include <asm/mem_encrypt.h> #include <asm/x86_init.h> -#include <asm/reboot.h> #include <asm/kvmclock.h> static int kvmclock __initdata = 1; @@ -197,23 +196,6 @@ static void kvm_setup_secondary_clock(void) } #endif -/*
- After the clock is registered, the host will keep writing to the
- registered memory location. If the guest happens to shutdown, this memory
- won't be valid. In cases like kexec, in which you install a new kernel, this
- means a random memory location will be kept being written. So before any
- kind of shutdown from our side, we unregister the clock by writing anything
- that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE -static void kvm_crash_shutdown(struct pt_regs *regs) -{
- native_write_msr(msr_kvm_system_time, 0, 0);
- kvm_disable_steal_time();
- native_machine_crash_shutdown(regs);
-} -#endif
- void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0);
@@ -344,9 +326,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; -#ifdef CONFIG_KEXEC_CORE
- machine_ops.crash_shutdown = kvm_crash_shutdown;
-#endif kvm_get_preset_lpj(); /*
Acked-by: Paolo Bonzini pbonzini@redhat.com
linux-stable-mirror@lists.linaro.org