The patch below does not apply to the options-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the no-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
From: Javier Martinez Canillas <javierm(a)redhat.com>
[ Upstream commit 3367aa7d74d240261de2543ddb35531ccad9d884 ]
Drivers that want to remove registered conflicting framebuffers prior to
register their own framebuffer, call to remove_conflicting_framebuffers().
This function takes the registration_lock mutex, to prevent a race when
drivers register framebuffer devices. But if a conflicting framebuffer
device is found, the underlaying platform device is unregistered and this
will lead to the platform driver .remove callback to be called. Which in
turn will call to unregister_framebuffer() that takes the same lock.
To prevent this, a struct fb_info.forced_out field was used as indication
to unregister_framebuffer() whether the mutex has to be grabbed or not.
But this could be unsafe, since the fbdev core is making assumptions about
what drivers may or may not do in their .remove callbacks. Allowing to run
these callbacks with the registration_lock held can cause deadlocks, since
the fbdev core has no control over what drivers do in their removal path.
A better solution is to drop the lock before platform_device_unregister(),
so unregister_framebuffer() can take it when called from the fbdev driver.
The lock is acquired again after the device has been unregistered and at
this point the removal loop can be restarted.
Since the conflicting framebuffer device has already been removed, the
loop would just finish when no more conflicting framebuffers are found.
Suggested-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20220511113039.1252432-1-javi…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/video/fbdev/core/fbmem.c | 22 +++++++++++++++-------
include/linux/fb.h | 1 -
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 7ee6eb2fa715..02b0cf2cfafe 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1568,6 +1568,7 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
{
int i;
+restart_removal:
/* check all firmware fbs and kick off if the base addr overlaps */
for_each_registered_fb(i) {
struct apertures_struct *gen_aper;
@@ -1600,12 +1601,23 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
pr_warn("fb%d: no device set\n", i);
do_unregister_framebuffer(registered_fb[i]);
} else if (dev_is_platform(device)) {
- registered_fb[i]->forced_out = true;
+ /*
+ * Drop the lock because if the device is unregistered, its
+ * driver will call to unregister_framebuffer(), that takes
+ * this lock.
+ */
+ mutex_unlock(®istration_lock);
platform_device_unregister(to_platform_device(device));
+ mutex_lock(®istration_lock);
} else {
pr_warn("fb%d: cannot remove device\n", i);
do_unregister_framebuffer(registered_fb[i]);
}
+ /*
+ * Restart the removal loop now that the device has been
+ * unregistered and its associated framebuffer gone.
+ */
+ goto restart_removal;
}
}
}
@@ -1876,13 +1888,9 @@ EXPORT_SYMBOL(register_framebuffer);
void
unregister_framebuffer(struct fb_info *fb_info)
{
- bool forced_out = fb_info->forced_out;
-
- if (!forced_out)
- mutex_lock(®istration_lock);
+ mutex_lock(®istration_lock);
do_unregister_framebuffer(fb_info);
- if (!forced_out)
- mutex_unlock(®istration_lock);
+ mutex_unlock(®istration_lock);
}
EXPORT_SYMBOL(unregister_framebuffer);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 2892145468c9..07fcd0e56682 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -511,7 +511,6 @@ struct fb_info {
} *apertures;
bool skip_vt_switch; /* no VT switch on suspend/resume required */
- bool forced_out; /* set when being removed by another driver */
};
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
--
2.35.1
From: Javier Martinez Canillas <javierm(a)redhat.com>
[ Upstream commit 3367aa7d74d240261de2543ddb35531ccad9d884 ]
Drivers that want to remove registered conflicting framebuffers prior to
register their own framebuffer, call to remove_conflicting_framebuffers().
This function takes the registration_lock mutex, to prevent a race when
drivers register framebuffer devices. But if a conflicting framebuffer
device is found, the underlaying platform device is unregistered and this
will lead to the platform driver .remove callback to be called. Which in
turn will call to unregister_framebuffer() that takes the same lock.
To prevent this, a struct fb_info.forced_out field was used as indication
to unregister_framebuffer() whether the mutex has to be grabbed or not.
But this could be unsafe, since the fbdev core is making assumptions about
what drivers may or may not do in their .remove callbacks. Allowing to run
these callbacks with the registration_lock held can cause deadlocks, since
the fbdev core has no control over what drivers do in their removal path.
A better solution is to drop the lock before platform_device_unregister(),
so unregister_framebuffer() can take it when called from the fbdev driver.
The lock is acquired again after the device has been unregistered and at
this point the removal loop can be restarted.
Since the conflicting framebuffer device has already been removed, the
loop would just finish when no more conflicting framebuffers are found.
Suggested-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20220511113039.1252432-1-javi…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/video/fbdev/core/fbmem.c | 22 +++++++++++++++-------
include/linux/fb.h | 1 -
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 528c87ff14d8..619d82e20d4e 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1568,6 +1568,7 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
{
int i;
+restart_removal:
/* check all firmware fbs and kick off if the base addr overlaps */
for_each_registered_fb(i) {
struct apertures_struct *gen_aper;
@@ -1602,12 +1603,23 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
*/
do_unregister_framebuffer(registered_fb[i]);
} else if (dev_is_platform(device)) {
- registered_fb[i]->forced_out = true;
+ /*
+ * Drop the lock because if the device is unregistered, its
+ * driver will call to unregister_framebuffer(), that takes
+ * this lock.
+ */
+ mutex_unlock(®istration_lock);
platform_device_unregister(to_platform_device(device));
+ mutex_lock(®istration_lock);
} else {
pr_warn("fb%d: cannot remove device\n", i);
do_unregister_framebuffer(registered_fb[i]);
}
+ /*
+ * Restart the removal loop now that the device has been
+ * unregistered and its associated framebuffer gone.
+ */
+ goto restart_removal;
}
}
}
@@ -1945,13 +1957,9 @@ EXPORT_SYMBOL(register_framebuffer);
void
unregister_framebuffer(struct fb_info *fb_info)
{
- bool forced_out = fb_info->forced_out;
-
- if (!forced_out)
- mutex_lock(®istration_lock);
+ mutex_lock(®istration_lock);
do_unregister_framebuffer(fb_info);
- if (!forced_out)
- mutex_unlock(®istration_lock);
+ mutex_unlock(®istration_lock);
}
EXPORT_SYMBOL(unregister_framebuffer);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3d7306c9a706..02f362c661c8 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -502,7 +502,6 @@ struct fb_info {
} *apertures;
bool skip_vt_switch; /* no VT switch on suspend/resume required */
- bool forced_out; /* set when being removed by another driver */
};
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
--
2.35.1
From: Javier Martinez Canillas <javierm(a)redhat.com>
[ Upstream commit 3367aa7d74d240261de2543ddb35531ccad9d884 ]
Drivers that want to remove registered conflicting framebuffers prior to
register their own framebuffer, call to remove_conflicting_framebuffers().
This function takes the registration_lock mutex, to prevent a race when
drivers register framebuffer devices. But if a conflicting framebuffer
device is found, the underlaying platform device is unregistered and this
will lead to the platform driver .remove callback to be called. Which in
turn will call to unregister_framebuffer() that takes the same lock.
To prevent this, a struct fb_info.forced_out field was used as indication
to unregister_framebuffer() whether the mutex has to be grabbed or not.
But this could be unsafe, since the fbdev core is making assumptions about
what drivers may or may not do in their .remove callbacks. Allowing to run
these callbacks with the registration_lock held can cause deadlocks, since
the fbdev core has no control over what drivers do in their removal path.
A better solution is to drop the lock before platform_device_unregister(),
so unregister_framebuffer() can take it when called from the fbdev driver.
The lock is acquired again after the device has been unregistered and at
this point the removal loop can be restarted.
Since the conflicting framebuffer device has already been removed, the
loop would just finish when no more conflicting framebuffers are found.
Suggested-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20220511113039.1252432-1-javi…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/video/fbdev/core/fbmem.c | 22 +++++++++++++++-------
include/linux/fb.h | 1 -
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 643383d74edc..82880f59ba67 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1566,6 +1566,7 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
{
int i;
+restart_removal:
/* check all firmware fbs and kick off if the base addr overlaps */
for_each_registered_fb(i) {
struct apertures_struct *gen_aper;
@@ -1600,12 +1601,23 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
*/
do_unregister_framebuffer(registered_fb[i]);
} else if (dev_is_platform(device)) {
- registered_fb[i]->forced_out = true;
+ /*
+ * Drop the lock because if the device is unregistered, its
+ * driver will call to unregister_framebuffer(), that takes
+ * this lock.
+ */
+ mutex_unlock(®istration_lock);
platform_device_unregister(to_platform_device(device));
+ mutex_lock(®istration_lock);
} else {
pr_warn("fb%d: cannot remove device\n", i);
do_unregister_framebuffer(registered_fb[i]);
}
+ /*
+ * Restart the removal loop now that the device has been
+ * unregistered and its associated framebuffer gone.
+ */
+ goto restart_removal;
}
}
}
@@ -1946,13 +1958,9 @@ EXPORT_SYMBOL(register_framebuffer);
void
unregister_framebuffer(struct fb_info *fb_info)
{
- bool forced_out = fb_info->forced_out;
-
- if (!forced_out)
- mutex_lock(®istration_lock);
+ mutex_lock(®istration_lock);
do_unregister_framebuffer(fb_info);
- if (!forced_out)
- mutex_unlock(®istration_lock);
+ mutex_unlock(®istration_lock);
}
EXPORT_SYMBOL(unregister_framebuffer);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 9a77ab615c36..ae399d3ea4f9 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -503,7 +503,6 @@ struct fb_info {
} *apertures;
bool skip_vt_switch; /* no VT switch on suspend/resume required */
- bool forced_out; /* set when being removed by another driver */
};
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
--
2.35.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 764643a6be07445308e492a528197044c801b3ba Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 14 Jun 2022 21:58:28 +0000
Subject: [PATCH] KVM: nVMX: Snapshot pre-VM-Enter DEBUGCTL for
!nested_run_pending case
If a nested run isn't pending, snapshot vmcs01.GUEST_IA32_DEBUGCTL
irrespective of whether or not VM_ENTRY_LOAD_DEBUG_CONTROLS is set in
vmcs12. When restoring nested state, e.g. after migration, without a
nested run pending, prepare_vmcs02() will propagate
nested.vmcs01_debugctl to vmcs02, i.e. will load garbage/zeros into
vmcs02.GUEST_IA32_DEBUGCTL.
If userspace restores nested state before MSRs, then loading garbage is a
non-issue as loading DEBUGCTL will also update vmcs02. But if usersepace
restores MSRs first, then KVM is responsible for propagating L2's value,
which is actually thrown into vmcs01, into vmcs02.
Restoring L2 MSRs into vmcs01, i.e. loading all MSRs before nested state
is all kinds of bizarre and ideally would not be supported. Sadly, some
VMMs do exactly that and rely on KVM to make things work.
Note, there's still a lurking SMM bug, as propagating vmcs01's DEBUGCTL
to vmcs02 across RSM may corrupt L2's DEBUGCTL. But KVM's entire VMX+SMM
emulation is flawed as SMI+RSM should not toouch _any_ VMCS when use the
"default treatment of SMIs", i.e. when not using an SMI Transfer Monitor.
Link: https://lore.kernel.org/all/Yobt1XwOfb5M6Dfa@google.com
Fixes: 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220614215831.3762138-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 66c25bb56938..4a53e0c73445 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3379,7 +3379,8 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
- if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+ if (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
if (kvm_mpx_supported() &&
(!vmx->nested.nested_run_pending ||
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fa578398a0ba2c079fa1170da21fa5baae0cedb2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 14 Jun 2022 21:58:27 +0000
Subject: [PATCH] KVM: nVMX: Snapshot pre-VM-Enter BNDCFGS for
!nested_run_pending case
If a nested run isn't pending, snapshot vmcs01.GUEST_BNDCFGS irrespective
of whether or not VM_ENTRY_LOAD_BNDCFGS is set in vmcs12. When restoring
nested state, e.g. after migration, without a nested run pending,
prepare_vmcs02() will propagate nested.vmcs01_guest_bndcfgs to vmcs02,
i.e. will load garbage/zeros into vmcs02.GUEST_BNDCFGS.
If userspace restores nested state before MSRs, then loading garbage is a
non-issue as loading BNDCFGS will also update vmcs02. But if usersepace
restores MSRs first, then KVM is responsible for propagating L2's value,
which is actually thrown into vmcs01, into vmcs02.
Restoring L2 MSRs into vmcs01, i.e. loading all MSRs before nested state
is all kinds of bizarre and ideally would not be supported. Sadly, some
VMMs do exactly that and rely on KVM to make things work.
Note, there's still a lurking SMM bug, as propagating vmcs01.GUEST_BNDFGS
to vmcs02 across RSM may corrupt L2's BNDCFGS. But KVM's entire VMX+SMM
emulation is flawed as SMI+RSM should not toouch _any_ VMCS when use the
"default treatment of SMIs", i.e. when not using an SMI Transfer Monitor.
Link: https://lore.kernel.org/all/Yobt1XwOfb5M6Dfa@google.com
Fixes: 62cf9bd8118c ("KVM: nVMX: Fix emulation of VM_ENTRY_LOAD_BNDCFGS")
Cc: stable(a)vger.kernel.org
Cc: Lei Wang <lei4.wang(a)intel.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220614215831.3762138-2-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7d8cd0ebcc75..66c25bb56938 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3382,7 +3382,8 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
if (kvm_mpx_supported() &&
- !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
/*