The ABI is broken and we cannot support it properly. Turn it off.
If this causes a meaningful performance regression for someone, KVM
can introduce an improved ABI that is supportable.
Cc: stable(a)vger.kernel.org
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
---
arch/x86/kernel/kvm.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 93ab0cbd304e..e6f2aefa298b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -318,11 +318,26 @@ static void kvm_guest_cpu_init(void)
pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
-#ifdef CONFIG_PREEMPTION
- pa |= KVM_ASYNC_PF_SEND_ALWAYS;
-#endif
pa |= KVM_ASYNC_PF_ENABLED;
+ /*
+ * We do not set KVM_ASYNC_PF_SEND_ALWAYS. With the current
+ * KVM paravirt ABI, the following scenario is possible:
+ *
+ * #PF: async page fault (KVM_PV_REASON_PAGE_NOT_PRESENT)
+ * NMI before CR2 or KVM_PF_REASON_PAGE_NOT_PRESENT
+ * NMI accesses user memory, e.g. due to perf
+ * #PF: normal page fault
+ * #PF reads CR2 and apf_reason -- apf_reason should be 0
+ *
+ * outer #PF reads CR2 and apf_reason -- apf_reason should be
+ * KVM_PV_REASON_PAGE_NOT_PRESENT
+ *
+ * There is no possible way that both reads of CR2 and
+ * apf_reason get the correct values. Fixing this would
+ * require paravirt ABI changes.
+ */
+
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
--
2.24.1
I noticed that commit 07928d9bfc81 "padata: Remove broken queue
flushing" has been backported to most stable branches, but commit
6fc4dbcf0276 "padata: Replace delayed timer with immediate workqueue in
padata_reorder" has not.
Is this correct? What prevents the parallel_data ref-count from
dropping to 0 while the timer is scheduled?
Ben.
--
Ben Hutchings
Larkinson's Law: All laws are basically false.
This is a note to let you know that I've just added the patch titled
kobject: Make sure the parent does not get released before its
to my driver-core git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
in the driver-core-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 4ef12f7198023c09ad6d25b652bd8748c965c7fa Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Date: Wed, 13 May 2020 18:18:40 +0300
Subject: kobject: Make sure the parent does not get released before its
children
In the function kobject_cleanup(), kobject_del(kobj) is
called before the kobj->release(). That makes it possible to
release the parent of the kobject before the kobject itself.
To fix that, adding function __kboject_del() that does
everything that kobject_del() does except release the parent
reference. kobject_cleanup() then calls __kobject_del()
instead of kobject_del(), and separately decrements the
reference count of the parent kobject after kobj->release()
has been called.
Reported-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Reported-by: kernel test robot <rong.a.chen(a)intel.com>
Fixes: 7589238a8cf3 ("Revert "software node: Simplify software_node_release() function"")
Suggested-by: "Rafael J. Wysocki" <rafael(a)kernel.org>
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Brendan Higgins <brendanhiggins(a)google.com>
Tested-by: Brendan Higgins <brendanhiggins(a)google.com>
Acked-by: Randy Dunlap <rdunlap(a)infradead.org>
Link: https://lore.kernel.org/r/20200513151840.36400-1-heikki.krogerus@linux.inte…
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
lib/kobject.c | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/lib/kobject.c b/lib/kobject.c
index 83198cb37d8d..2bd631460e18 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -599,14 +599,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent)
}
EXPORT_SYMBOL_GPL(kobject_move);
-/**
- * kobject_del() - Unlink kobject from hierarchy.
- * @kobj: object.
- *
- * This is the function that should be called to delete an object
- * successfully added via kobject_add().
- */
-void kobject_del(struct kobject *kobj)
+static void __kobject_del(struct kobject *kobj)
{
struct kernfs_node *sd;
const struct kobj_type *ktype;
@@ -625,9 +618,23 @@ void kobject_del(struct kobject *kobj)
kobj->state_in_sysfs = 0;
kobj_kset_leave(kobj);
- kobject_put(kobj->parent);
kobj->parent = NULL;
}
+
+/**
+ * kobject_del() - Unlink kobject from hierarchy.
+ * @kobj: object.
+ *
+ * This is the function that should be called to delete an object
+ * successfully added via kobject_add().
+ */
+void kobject_del(struct kobject *kobj)
+{
+ struct kobject *parent = kobj->parent;
+
+ __kobject_del(kobj);
+ kobject_put(parent);
+}
EXPORT_SYMBOL(kobject_del);
/**
@@ -663,6 +670,7 @@ EXPORT_SYMBOL(kobject_get_unless_zero);
*/
static void kobject_cleanup(struct kobject *kobj)
{
+ struct kobject *parent = kobj->parent;
struct kobj_type *t = get_ktype(kobj);
const char *name = kobj->name;
@@ -684,7 +692,7 @@ static void kobject_cleanup(struct kobject *kobj)
if (kobj->state_in_sysfs) {
pr_debug("kobject: '%s' (%p): auto cleanup kobject_del\n",
kobject_name(kobj), kobj);
- kobject_del(kobj);
+ __kobject_del(kobj);
}
if (t && t->release) {
@@ -698,6 +706,8 @@ static void kobject_cleanup(struct kobject *kobj)
pr_debug("kobject: '%s': free name\n", name);
kfree_const(name);
}
+
+ kobject_put(parent);
}
#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
--
2.26.2
This is a note to let you know that I've just added the patch titled
driver core: Fix handling of SYNC_STATE_ONLY + STATELESS device links
to my driver-core git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
in the driver-core-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 44e960490ddf868fc9135151c4a658936e771dc2 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak(a)google.com>
Date: Tue, 19 May 2020 21:36:26 -0700
Subject: driver core: Fix handling of SYNC_STATE_ONLY + STATELESS device links
Commit 21c27f06587d ("driver core: Fix SYNC_STATE_ONLY device link
implementation") didn't completely fix STATELESS + SYNC_STATE_ONLY
handling.
What looks like an optimization in that commit is actually a bug that
causes an if condition to always take the else path. This prevents
reordering of devices in the dpm_list when a DL_FLAG_STATELESS device
link is create on top of an existing DL_FLAG_SYNC_STATE_ONLY device
link.
Fixes: 21c27f06587d ("driver core: Fix SYNC_STATE_ONLY device link implementation")
Signed-off-by: Saravana Kannan <saravanak(a)google.com>
Cc: stable <stable(a)vger.kernel.org>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Link: https://lore.kernel.org/r/20200520043626.181820-1-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/base/core.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4e0e430315d9..0cad34f1eede 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -360,12 +360,14 @@ struct device_link *device_link_add(struct device *consumer,
if (flags & DL_FLAG_STATELESS) {
kref_get(&link->kref);
- link->flags |= DL_FLAG_STATELESS;
if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
- !(link->flags & DL_FLAG_STATELESS))
+ !(link->flags & DL_FLAG_STATELESS)) {
+ link->flags |= DL_FLAG_STATELESS;
goto reorder;
- else
+ } else {
+ link->flags |= DL_FLAG_STATELESS;
goto out;
+ }
}
/*
--
2.26.2
From: Alexander Duyck <alexander.h.duyck(a)linux.intel.com>
We are seeing a deadlock in e1000 down when NAPI is being disabled. Looking
over the kernel function trace of the system it appears that the interface
is being closed and then a reset is hitting which deadlocks the interface
as the NAPI interface is already disabled.
To prevent this from happening I am disabling the reset task when
__E1000_DOWN is already set. In addition code has been added so that we set
the __E1000_DOWN while holding the __E1000_RESET flag in e1000_close in
order to guarantee that the reset task will not run after we have started
the close call.
CC: stable <stable(a)vger.kernel.org>
Signed-off-by: Alexander Duyck <alexander.h.duyck(a)linux.intel.com>
Tested-by: Maxim Zhukov <mussitantesmortem(a)gmail.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher(a)intel.com>
---
drivers/net/ethernet/intel/e1000/e1000_main.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 05bc6e216bca..d9fa4600f745 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -542,8 +542,13 @@ void e1000_reinit_locked(struct e1000_adapter *adapter)
WARN_ON(in_interrupt());
while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
msleep(1);
- e1000_down(adapter);
- e1000_up(adapter);
+
+ /* only run the task if not already down */
+ if (!test_bit(__E1000_DOWN, &adapter->flags)) {
+ e1000_down(adapter);
+ e1000_up(adapter);
+ }
+
clear_bit(__E1000_RESETTING, &adapter->flags);
}
@@ -1433,10 +1438,15 @@ int e1000_close(struct net_device *netdev)
struct e1000_hw *hw = &adapter->hw;
int count = E1000_CHECK_RESET_COUNT;
- while (test_bit(__E1000_RESETTING, &adapter->flags) && count--)
+ while (test_and_set_bit(__E1000_RESETTING, &adapter->flags) && count--)
usleep_range(10000, 20000);
- WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags));
+ WARN_ON(count < 0);
+
+ /* signal that we're down so that the reset task will no longer run */
+ set_bit(__E1000_DOWN, &adapter->flags);
+ clear_bit(__E1000_RESETTING, &adapter->flags);
+
e1000_down(adapter);
e1000_power_down_phy(adapter);
e1000_free_irq(adapter);
--
2.26.2
Please pick this fix for 4.19 and 5.4 stable branches:
commit d80b64ff297e40c2b6f7d7abc1b3eba70d22a068
Author: Miaohe Lin <linmiaohe(a)huawei.com>
Date: Sat Jan 4 16:56:49 2020 +0800
KVM: SVM: Fix potential memory leak in svm_cpu_init()
It applies cleanly to both.
Ben.
--
Ben Hutchings, Software Developer Codethink Ltd
https://www.codethink.co.uk/ Dale House, 35 Dale Street
Manchester, M1 2HF, United Kingdom
Please queue up the attached backport of commit 2351c88f8296 "watchdog:
Fix the race between the release of watchdog_core_data and cdev" for
4.14.
Ben.
--
Ben Hutchings, Software Developer Codethink Ltd
https://www.codethink.co.uk/ Dale House, 35 Dale Street
Manchester, M1 2HF, United Kingdom