The patch titled
Subject: mm/hwpoison: do not lock page again when me_huge_page() successfully recovers
has been added to the -mm tree. Its filename is
mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-hwpoison-do-not-lock-page-agai…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-hwpoison-do-not-lock-page-agai…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Subject: mm/hwpoison: do not lock page again when me_huge_page() successfully recovers
Currently me_huge_page() temporary unlocks page to perform some actions
then locks it again later. My testcase (which calls hard-offline on some
tail page in a hugetlb, then accesses the address of the hugetlb range)
showed that page allocation code detects this page lock on buddy page and
printed out "BUG: Bad page state" message.
check_new_page_bad() does not consider a page with __PG_HWPOISON as bad
page, so this flag works as kind of filter, but this filtering doesn't
work in this case because the "bad page" is not the actual hwpoisoned
page. So stop locking page again. Actions to be taken depend on the page
type of the error, so page unlocking should be done in ->action()
callbacks. So let's make it assumed and change all existing callbacks
that way.
Link: https://lkml.kernel.org/r/20210609072029.74645-1-nao.horiguchi@gmail.com
Fixes: commit 78bb920344b8 ("mm: hwpoison: dissolve in-use hugepage in un=
recoverable memory error")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Tony Luck <tony.luck(a)intel.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 44 ++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)
--- a/mm/memory-failure.c~mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers
+++ a/mm/memory-failure.c
@@ -658,6 +658,7 @@ static int truncate_error_page(struct pa
*/
static int me_kernel(struct page *p, unsigned long pfn)
{
+ unlock_page(p);
return MF_IGNORED;
}
@@ -667,6 +668,7 @@ static int me_kernel(struct page *p, uns
static int me_unknown(struct page *p, unsigned long pfn)
{
pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
+ unlock_page(p);
return MF_FAILED;
}
@@ -675,6 +677,7 @@ static int me_unknown(struct page *p, un
*/
static int me_pagecache_clean(struct page *p, unsigned long pfn)
{
+ int ret;
struct address_space *mapping;
delete_from_lru_cache(p);
@@ -683,8 +686,10 @@ static int me_pagecache_clean(struct pag
* For anonymous pages we're done the only reference left
* should be the one m_f() holds.
*/
- if (PageAnon(p))
- return MF_RECOVERED;
+ if (PageAnon(p)) {
+ ret = MF_RECOVERED;
+ goto out;
+ }
/*
* Now truncate the page in the page cache. This is really
@@ -698,7 +703,8 @@ static int me_pagecache_clean(struct pag
/*
* Page has been teared down in the meanwhile
*/
- return MF_FAILED;
+ ret = MF_FAILED;
+ goto out;
}
/*
@@ -706,7 +712,10 @@ static int me_pagecache_clean(struct pag
*
* Open: to take i_mutex or not for this? Right now we don't.
*/
- return truncate_error_page(p, pfn, mapping);
+ ret = truncate_error_page(p, pfn, mapping);
+out:
+ unlock_page(p);
+ return ret;
}
/*
@@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct pag
*/
static int me_swapcache_dirty(struct page *p, unsigned long pfn)
{
+ int ret;
+
ClearPageDirty(p);
/* Trigger EIO in shmem: */
ClearPageUptodate(p);
- if (!delete_from_lru_cache(p))
- return MF_DELAYED;
- else
- return MF_FAILED;
+ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
+ unlock_page(p);
+ return ret;
}
static int me_swapcache_clean(struct page *p, unsigned long pfn)
{
+ int ret;
+
delete_from_swap_cache(p);
- if (!delete_from_lru_cache(p))
- return MF_RECOVERED;
- else
- return MF_FAILED;
+ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
+ unlock_page(p);
+ return ret;
}
/*
@@ -820,6 +831,7 @@ static int me_huge_page(struct page *p,
mapping = page_mapping(hpage);
if (mapping) {
res = truncate_error_page(hpage, pfn, mapping);
+ unlock_page(hpage);
} else {
res = MF_FAILED;
unlock_page(hpage);
@@ -834,7 +846,6 @@ static int me_huge_page(struct page *p,
page_ref_inc(p);
res = MF_RECOVERED;
}
- lock_page(hpage);
}
return res;
@@ -866,6 +877,8 @@ static struct page_state {
unsigned long mask;
unsigned long res;
enum mf_action_page_type type;
+
+ /* Callback ->action() has to unlock the relevant page inside it. */
int (*action)(struct page *p, unsigned long pfn);
} error_states[] = {
{ reserved, reserved, MF_MSG_KERNEL, me_kernel },
@@ -929,6 +942,7 @@ static int page_action(struct page_state
int result;
int count;
+ /* page p should be unlocked after returning from ps->action(). */
result = ps->action(p, pfn);
count = page_count(p) - 1;
@@ -1313,7 +1327,7 @@ static int memory_failure_hugetlb(unsign
goto out;
}
- res = identify_page_state(pfn, p, page_flags);
+ return identify_page_state(pfn, p, page_flags);
out:
unlock_page(head);
return res;
@@ -1596,6 +1610,8 @@ try_again:
identify_page_state:
res = identify_page_state(pfn, p, page_flags);
+ mutex_unlock(&mf_mutex);
+ return res;
unlock_page:
unlock_page(p);
unlock_mutex:
_
Patches currently in -mm which might be from naoya.horiguchi(a)nec.com are
mmhwpoison-fix-race-with-hugetlb-page-allocation.patch
mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch
mmhwpoison-send-sigbus-with-error-virutal-address.patch
mmhwpoison-send-sigbus-with-error-virutal-address-fix.patch
mmhwpoison-make-get_hwpoison_page-call-get_any_page.patch
The rproc_char_device_remove() call currently unmaps the cdev
region instead of simply deleting the cdev that was added as a
part of the rproc_char_device_add() call. This change fixes that
behaviour, and also fixes the order in which device_del() and
cdev_del() need to be called.
Signed-off-by: Siddharth Gupta <sidgup(a)codeaurora.org>
Cc: stable(a)vger.kernel.org
---
drivers/remoteproc/remoteproc_cdev.c | 2 +-
drivers/remoteproc/remoteproc_core.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/remoteproc/remoteproc_cdev.c b/drivers/remoteproc/remoteproc_cdev.c
index 0b8a84c..4ad98b0 100644
--- a/drivers/remoteproc/remoteproc_cdev.c
+++ b/drivers/remoteproc/remoteproc_cdev.c
@@ -124,7 +124,7 @@ int rproc_char_device_add(struct rproc *rproc)
void rproc_char_device_remove(struct rproc *rproc)
{
- __unregister_chrdev(MAJOR(rproc->dev.devt), rproc->index, 1, "remoteproc");
+ cdev_del(&rproc->cdev);
}
void __init rproc_init_cdev(void)
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index b65fce3..b874280 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -2619,7 +2619,6 @@ int rproc_del(struct rproc *rproc)
mutex_unlock(&rproc->lock);
rproc_delete_debug_dir(rproc);
- rproc_char_device_remove(rproc);
/* the rproc is downref'ed as soon as it's removed from the klist */
mutex_lock(&rproc_list_mutex);
@@ -2630,6 +2629,7 @@ int rproc_del(struct rproc *rproc)
synchronize_rcu();
device_del(&rproc->dev);
+ rproc_char_device_remove(rproc);
return 0;
}
--
Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
We can validate whether the remoteproc is correctly setup before
making the cdev_add and device_add calls. This saves us the
trouble of cleaning up later on.
Signed-off-by: Siddharth Gupta <sidgup(a)codeaurora.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Cc: stable(a)vger.kernel.org
---
drivers/remoteproc/remoteproc_core.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 9ad8c5f..b65fce3 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -2333,16 +2333,16 @@ int rproc_add(struct rproc *rproc)
struct device *dev = &rproc->dev;
int ret;
- /* add char device for this remoteproc */
- ret = rproc_char_device_add(rproc);
+ ret = rproc_validate(rproc);
if (ret < 0)
return ret;
- ret = device_add(dev);
+ /* add char device for this remoteproc */
+ ret = rproc_char_device_add(rproc);
if (ret < 0)
return ret;
- ret = rproc_validate(rproc);
+ ret = device_add(dev);
if (ret < 0)
return ret;
--
Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
When cdev_add is called after device_add has been called there is no
way for the userspace to know about the addition of a cdev as cdev_add
itself doesn't trigger a uevent notification, or for the kernel to
know about the change to devt. This results in two problems:
- mknod is never called for the cdev and hence no cdev appears on
devtmpfs.
- sysfs links to the new cdev are not established.
The cdev needs to be added and devt assigned before device_add() is
called in order for the relevant sysfs and devtmpfs entries to be
created and the uevent to be properly populated.
Signed-off-by: Siddharth Gupta <sidgup(a)codeaurora.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Cc: stable(a)vger.kernel.org
---
drivers/remoteproc/remoteproc_core.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 6348aaa..9ad8c5f 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -2333,6 +2333,11 @@ int rproc_add(struct rproc *rproc)
struct device *dev = &rproc->dev;
int ret;
+ /* add char device for this remoteproc */
+ ret = rproc_char_device_add(rproc);
+ if (ret < 0)
+ return ret;
+
ret = device_add(dev);
if (ret < 0)
return ret;
@@ -2346,11 +2351,6 @@ int rproc_add(struct rproc *rproc)
/* create debugfs entries */
rproc_create_debug_dir(rproc);
- /* add char device for this remoteproc */
- ret = rproc_char_device_add(rproc);
- if (ret < 0)
- return ret;
-
/* if rproc is marked always-on, request it to boot */
if (rproc->auto_boot) {
ret = rproc_trigger_auto_boot(rproc);
--
Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Hello all,
with commit "mount-util: shortcut things after generating top-level bind
mount" c2c331056a7c331a5478124b3cd6a34c9f539839
(5c5753b9ea5cc012586ae90d357d460dec4301a4 in master) systemd 248.2 and
later won't boot 4.19 kernel series. I tested 4.19.194 so far.
Lennart's guess is that this is the mount table brokeness on old
kernels, that newer libmount worked around. i.e. /proc/self/mountinfo on
old kernels showed partially old data and partially new data, and
confused the heck out of everyone. "proc on 20" with mount options
"(rw,25)" doesn't look right at all.
If we look at 4.14 and 4.4 kernel series, those boot. 4.9 I've still to
test, as that one didn't boot either for me.
So an idea what is missing in 4.19 kernel series? Other kernels work.
Systemd issue is this one: https://github.com/systemd/systemd/issues/19926
--
Best, Philip
Hi,
The commit 591a22c14d3f ("proc: Track /proc/$pid/attr/ opener mm_struct")
that we got in v5.13-rc6 broke our regression to pieces. The NIC interfaces
fail to start when using NetworkManager.
There is nothing in dmesg except error that NetworkManager failed to start.
Our setups are:
* VMs with virtio-net NICs
* Fedora 29
The revert fixes the issue and VMs boot with network working.
Thanks