'Commit cc27b735ad3a ("PCI/portdrv: Turn off PCIe services during
shutdown")' has been added to kernel to shutdown pending PCIe port
service interrupts during reboot so that a newly started kexec kernel
wouldn't observe pending interrupts.
pcie_port_device_remove() is disabling the root port and switches by
calling pci_disable_device() after all PCIe service drivers are shutdown.
pci_disable_device() has a much wider impact then port service itself and
it prevents all inbound transactions to reach to the system and impacts
the entire PCI traffic behind the bridge.
Issue is that pcie_port_device_remove() doesn't maintain any coordination
with the rest of the PCI device drivers in the system before clearing the
bus master bit.
This has been found to cause crashes on HP DL360 Gen9 machines during
reboot. Besides, kexec is already clearing the bus master bit in
pci_device_shutdown() after all PCI drivers are removed.
Just remove the extra clear in shutdown path by seperating the remove and
shutdown APIs in the PORTDRV.
Signed-off-by: Sinan Kaya <okaya(a)codeaurora.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199779
Fixes: cc27b735ad3a ("PCI/portdrv: Turn off PCIe services during shutdown")
Cc: stable(a)vger.kernel.org
Reported-by: Ryan Finnie <ryan(a)finnie.org>
---
drivers/pci/pcie/portdrv.h | 2 +-
drivers/pci/pcie/portdrv_core.c | 5 +++--
drivers/pci/pcie/portdrv_pci.c | 16 +++++++++++++---
3 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index d0c6783..f6e88fe 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -86,7 +86,7 @@ int pcie_port_device_register(struct pci_dev *dev);
int pcie_port_device_suspend(struct device *dev);
int pcie_port_device_resume(struct device *dev);
#endif
-void pcie_port_device_remove(struct pci_dev *dev);
+void pcie_port_device_remove(struct pci_dev *dev, bool disable);
int __must_check pcie_port_bus_register(void);
void pcie_port_bus_unregister(void);
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index c9c0663..f35341e 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -405,11 +405,12 @@ static int remove_iter(struct device *dev, void *data)
* Remove PCI Express port service devices associated with given port and
* disable MSI-X or MSI for the port.
*/
-void pcie_port_device_remove(struct pci_dev *dev)
+void pcie_port_device_remove(struct pci_dev *dev, bool disable)
{
device_for_each_child(&dev->dev, NULL, remove_iter);
pci_free_irq_vectors(dev);
- pci_disable_device(dev);
+ if (disable)
+ pci_disable_device(dev);
}
/**
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 973f1b8..29afaf3 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -137,7 +137,7 @@ static int pcie_portdrv_probe(struct pci_dev *dev,
return 0;
}
-static void pcie_portdrv_remove(struct pci_dev *dev)
+static void __pcie_portdrv_remove(struct pci_dev *dev, bool disable)
{
if (pci_bridge_d3_possible(dev)) {
pm_runtime_forbid(&dev->dev);
@@ -145,7 +145,17 @@ static void pcie_portdrv_remove(struct pci_dev *dev)
pm_runtime_dont_use_autosuspend(&dev->dev);
}
- pcie_port_device_remove(dev);
+ pcie_port_device_remove(dev, disable);
+}
+
+static void pcie_portdrv_remove(struct pci_dev *dev)
+{
+ return __pcie_portdrv_remove(dev, true);
+}
+
+static void pcie_portdrv_shutdown(struct pci_dev *dev)
+{
+ return __pcie_portdrv_remove(dev, false);
}
static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
@@ -218,7 +228,7 @@ static struct pci_driver pcie_portdriver = {
.probe = pcie_portdrv_probe,
.remove = pcie_portdrv_remove,
- .shutdown = pcie_portdrv_remove,
+ .shutdown = pcie_portdrv_shutdown,
.err_handler = &pcie_portdrv_err_handler,
--
2.7.4
It is up to a driver to implement shutdown() callback. If shutdown()
callback is not implemented, PCI device can have pending interrupt and
even do DMA transactions while the system is going down.
If kexec is in use, this can damage the newly booting kexec kernel
or even prevent it from booting altogether. Fallback to calling the
remove() callback if shutdown() isn't implemented for a given driver.
Signed-off-by: Sinan Kaya <okaya(a)codeaurora.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199779
Fixes: cc27b735ad3a ("PCI/portdrv: Turn off PCIe services during shutdown")
Cc: stable(a)vger.kernel.org
Reported-by: Ryan Finnie <ryan(a)finnie.org>
---
drivers/pci/pci-driver.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index cbda0e6..75a00fe 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -477,8 +477,17 @@ static void pci_device_shutdown(struct device *dev)
pm_runtime_resume(dev);
+ /*
+ * Try shutdown callback if it exists, otherwise fallback to remove
+ * callback. PCI drivers can do DMA and have pending interrupts.
+ * Leaving the DMA and interrupts pending could damage the newly
+ * booting kexec kernel as well as prevent it from booting altogether
+ * if the pending interrupt is level.
+ */
if (drv && drv->shutdown)
drv->shutdown(pci_dev);
+ else if (drv && drv->remove)
+ drv->remove(pci_dev);
/*
* If this is a kexec reboot, turn off Bus Master bit on the
--
2.7.4
The patch titled
Subject: ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry
has been added to the -mm tree. Its filename is
ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/ocfs2-fix-a-misuse-a-of-brelse-aft…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/ocfs2-fix-a-misuse-a-of-brelse-aft…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Changwei Ge <ge.changwei(a)h3c.com>
Subject: ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry
Somehow, file system metadata was corrupted, which causes
ocfs2_check_dir_entry() to fail in function ocfs2_dir_foreach_blk_el().
According to the original design intention, if above happens we should
skip the problematic block and continue to retrieve dir entry. But there
is obviouse misuse of brelse around related code.
After failure of ocfs2_check_dir_entry(), currunt code just moves to next
position and uses the problematic buffer head again and again during which
the problematic buffer head is released for multiple times. I suppose,
this a serious issue which is long-lived in ocfs2. This may cause other
file systems which is also used in a the same host insane.
So we should also consider about bakcporting this patch into
linux -stable.
Link: http://lkml.kernel.org/r/HK2PR06MB045211675B43EED794E597B6D56E0@HK2PR06MB04…
Signed-off-by: Changwei Ge <ge.changwei(a)h3c.com>
Suggested-by: Changkuo Shi <shi.changkuo(a)h3c.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Joseph Qi <jiangqi903(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/dir.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff -puN fs/ocfs2/dir.c~ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry fs/ocfs2/dir.c
--- a/fs/ocfs2/dir.c~ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry
+++ a/fs/ocfs2/dir.c
@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(stru
/* On error, skip the f_pos to the
next block. */
ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
- brelse(bh);
- continue;
+ break;
}
if (le64_to_cpu(de->inode)) {
unsigned char d_type = DT_UNKNOWN;
_
Patches currently in -mm which might be from ge.changwei(a)h3c.com are
ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch
ocfs2-dont-put-and-assign-null-to-bh-allocated-outside.patch
ocfs2-dont-use-iocb-when-eiocbqueued-returns.patch
The patch titled
Subject: kasan: fix memory hotplug during boot
has been removed from the -mm tree. Its filename was
kasan-fix-memory-hotplug-during-boot.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: kasan: fix memory hotplug during boot
Using module_init() is wrong. E.g. ACPI adds and onlines memory before
our memory notifier gets registered.
This makes sure that ACPI memory detected during boot up will not result
in a kernel crash.
Easily reproducible with QEMU, just specify a DIMM when starting up.
Link: http://lkml.kernel.org/r/20180522100756.18478-3-david@redhat.com
Fixes: 786a8959912e ("kasan: disable memory hotplug")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Andrey Ryabinin <aryabinin(a)virtuozzo.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kasan/kasan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -puN mm/kasan/kasan.c~kasan-fix-memory-hotplug-during-boot mm/kasan/kasan.c
--- a/mm/kasan/kasan.c~kasan-fix-memory-hotplug-during-boot
+++ a/mm/kasan/kasan.c
@@ -898,5 +898,5 @@ static int __init kasan_memhotplug_init(
return 0;
}
-module_init(kasan_memhotplug_init);
+core_initcall(kasan_memhotplug_init);
#endif
_
Patches currently in -mm which might be from david(a)redhat.com are
The patch titled
Subject: kasan: free allocated shadow memory on MEM_CANCEL_ONLINE
has been removed from the -mm tree. Its filename was
kasan-free-allocated-shadow-memory-on-mem_cancel_online.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: kasan: free allocated shadow memory on MEM_CANCEL_ONLINE
We have to free memory again when we cancel onlining, otherwise a later
onlining attempt will fail.
Link: http://lkml.kernel.org/r/20180522100756.18478-2-david@redhat.com
Fixes: fa69b5989bb0 ("mm/kasan: add support for memory hotplug")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Andrey Ryabinin <aryabinin(a)virtuozzo.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kasan/kasan.c | 1 +
1 file changed, 1 insertion(+)
diff -puN mm/kasan/kasan.c~kasan-free-allocated-shadow-memory-on-mem_cancel_online mm/kasan/kasan.c
--- a/mm/kasan/kasan.c~kasan-free-allocated-shadow-memory-on-mem_cancel_online
+++ a/mm/kasan/kasan.c
@@ -866,6 +866,7 @@ static int __meminit kasan_mem_notifier(
kmemleak_ignore(ret);
return NOTIFY_OK;
}
+ case MEM_CANCEL_ONLINE:
case MEM_OFFLINE: {
struct vm_struct *vm;
_
Patches currently in -mm which might be from david(a)redhat.com are
The patch titled
Subject: kernel/sys.c: fix potential Spectre v1 issue
has been removed from the -mm tree. Its filename was
kernel-sys-fix-potential-spectre-v1.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: "Gustavo A. R. Silva" <gustavo(a)embeddedor.com>
Subject: kernel/sys.c: fix potential Spectre v1 issue
`resource' can be controlled by user-space, hence leading to a potential
exploitation of the Spectre variant 1 vulnerability.
This issue was detected with the help of Smatch:
kernel/sys.c:1474 __do_compat_sys_old_getrlimit() warn: potential
spectre issue 'get_current()->signal->rlim' (local cap)
kernel/sys.c:1455 __do_sys_old_getrlimit() warn: potential spectre issue
'get_current()->signal->rlim' (local cap)
Fix this by sanitizing *resource* before using it to index
current->signal->rlim
Notice that given that speculation windows are large, the policy is to
kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].
[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2
Link: http://lkml.kernel.org/r/20180515030038.GA11822@embeddedor.com
Signed-off-by: Gustavo A. R. Silva <gustavo(a)embeddedor.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Alexei Starovoitov <ast(a)kernel.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/sys.c | 5 +++++
1 file changed, 5 insertions(+)
diff -puN kernel/sys.c~kernel-sys-fix-potential-spectre-v1 kernel/sys.c
--- a/kernel/sys.c~kernel-sys-fix-potential-spectre-v1
+++ a/kernel/sys.c
@@ -71,6 +71,9 @@
#include <asm/io.h>
#include <asm/unistd.h>
+/* Hardening for Spectre-v1 */
+#include <linux/nospec.h>
+
#include "uid16.h"
#ifndef SET_UNALIGN_CTL
@@ -1453,6 +1456,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned
if (resource >= RLIM_NLIMITS)
return -EINVAL;
+ resource = array_index_nospec(resource, RLIM_NLIMITS);
task_lock(current->group_leader);
x = current->signal->rlim[resource];
task_unlock(current->group_leader);
@@ -1472,6 +1476,7 @@ COMPAT_SYSCALL_DEFINE2(old_getrlimit, un
if (resource >= RLIM_NLIMITS)
return -EINVAL;
+ resource = array_index_nospec(resource, RLIM_NLIMITS);
task_lock(current->group_leader);
r = current->signal->rlim[resource];
task_unlock(current->group_leader);
_
Patches currently in -mm which might be from gustavo(a)embeddedor.com are