From: Kairui Song <kasong(a)tencent.com>
The helper for shmem swap freeing is not handling the order of swap
entries correctly. It uses xa_cmpxchg_irq to erase the swap entry,
but it gets the entry order before that using xa_get_order
without lock protection. As a result the order could be a stalled value
if the entry is split after the xa_get_order and before the
xa_cmpxchg_irq. In fact that are more way for other races to occur
during the time window.
To fix that, open code the Xarray cmpxchg and put the order retrivial and
value checking in the same critical section. Also ensure the order won't
exceed the truncate border.
I observed random swapoff hangs and swap entry leaks when stress
testing ZSWAP with shmem. After applying this patch, the problem is resolved.
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kairui Song <kasong(a)tencent.com>
---
mm/shmem.c | 35 +++++++++++++++++++++++------------
1 file changed, 23 insertions(+), 12 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 0b4c8c70d017..e160da0cd30f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -961,18 +961,28 @@ static void shmem_delete_from_page_cache(struct folio *folio, void *radswap)
* the number of pages being freed. 0 means entry not found in XArray (0 pages
* being freed).
*/
-static long shmem_free_swap(struct address_space *mapping,
- pgoff_t index, void *radswap)
+static long shmem_free_swap(struct address_space *mapping, pgoff_t index,
+ unsigned int max_nr, void *radswap)
{
- int order = xa_get_order(&mapping->i_pages, index);
- void *old;
+ XA_STATE(xas, &mapping->i_pages, index);
+ unsigned int nr_pages = 0;
+ void *entry;
- old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0);
- if (old != radswap)
- return 0;
- swap_put_entries_direct(radix_to_swp_entry(radswap), 1 << order);
+ xas_lock_irq(&xas);
+ entry = xas_load(&xas);
+ if (entry == radswap) {
+ nr_pages = 1 << xas_get_order(&xas);
+ if (index == round_down(xas.xa_index, nr_pages) && nr_pages < max_nr)
+ xas_store(&xas, NULL);
+ else
+ nr_pages = 0;
+ }
+ xas_unlock_irq(&xas);
+
+ if (nr_pages)
+ swap_put_entries_direct(radix_to_swp_entry(radswap), nr_pages);
- return 1 << order;
+ return nr_pages;
}
/*
@@ -1124,8 +1134,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, uoff_t lend,
if (xa_is_value(folio)) {
if (unfalloc)
continue;
- nr_swaps_freed += shmem_free_swap(mapping,
- indices[i], folio);
+ nr_swaps_freed += shmem_free_swap(mapping, indices[i],
+ end - indices[i], folio);
continue;
}
@@ -1195,7 +1205,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, uoff_t lend,
if (unfalloc)
continue;
- swaps_freed = shmem_free_swap(mapping, indices[i], folio);
+ swaps_freed = shmem_free_swap(mapping, indices[i],
+ end - indices[i], folio);
if (!swaps_freed) {
/* Swap was replaced by page: retry */
index = indices[i];
---
base-commit: ab3d40bdac831c67e130fda12f3011505556500f
change-id: 20260111-shmem-swap-fix-8d0e20a14b5d
Best regards,
--
Kairui Song <kasong(a)tencent.com>
From: Björn Töpel <bjorn(a)rivosinc.com>
[ Upstream commit 7d1d19a11cfbfd8bae1d89cc010b2cc397cd0c48 ]
The XOL (execute out-of-line) buffer is used to single-step the
replaced instruction(s) for uprobes. The RISC-V port was missing a
proper fence.i (i$ flushing) after constructing the XOL buffer, which
can result in incorrect execution of stale/broken instructions.
This was found running the BPF selftests "test_progs:
uprobe_autoattach, attach_probe" on the Spacemit K1/X60, where the
uprobes tests randomly blew up.
Reviewed-by: Guo Ren <guoren(a)kernel.org>
Fixes: 74784081aac8 ("riscv: Add uprobes supported")
Signed-off-by: Björn Töpel <bjorn(a)rivosinc.com>
Link: https://lore.kernel.org/r/20250419111402.1660267-2-bjorn@kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
Signed-off-by: Rahul Sharma <black.hawk(a)163.com>
---
arch/riscv/kernel/probes/uprobes.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
index 4b3dc8beaf77..cc15f7ca6cc1 100644
--- a/arch/riscv/kernel/probes/uprobes.c
+++ b/arch/riscv/kernel/probes/uprobes.c
@@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
/* Initialize the slot */
void *kaddr = kmap_atomic(page);
void *dst = kaddr + (vaddr & ~PAGE_MASK);
+ unsigned long start = (unsigned long)dst;
memcpy(dst, src, len);
@@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
*(uprobe_opcode_t *)dst = __BUG_INSN_32;
}
+ flush_icache_range(start, start + len);
kunmap_atomic(kaddr);
-
- /*
- * We probably need flush_icache_user_page() but it needs vma.
- * This should work on most of architectures by default. If
- * architecture needs to do something different it can define
- * its own version of the function.
- */
- flush_dcache_page(page);
}
--
2.34.1
The patch titled
Subject: kho: init alloc tags when restoring pages from reserved memory
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kho-init-alloc-tags-when-restoring-pages-from-reserved-memory.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Ran Xiaokai <ran.xiaokai(a)zte.com.cn>
Subject: kho: init alloc tags when restoring pages from reserved memory
Date: Fri, 9 Jan 2026 10:42:51 +0000
Memblock pages (including reserved memory) should have their allocation
tags initialized to CODETAG_EMPTY via clear_page_tag_ref() before being
released to the page allocator. When kho restores pages through
kho_restore_page(), missing this call causes mismatched
allocation/deallocation tracking and below warning message:
alloc_tag was not set
WARNING: include/linux/alloc_tag.h:164 at ___free_pages+0xb8/0x260, CPU#1: swapper/0/1
RIP: 0010:___free_pages+0xb8/0x260
kho_restore_vmalloc+0x187/0x2e0
kho_test_init+0x3c4/0xa30
do_one_initcall+0x62/0x2b0
kernel_init_freeable+0x25b/0x480
kernel_init+0x1a/0x1c0
ret_from_fork+0x2d1/0x360
Add missing clear_page_tag_ref() annotation in kho_restore_page() to
fix this.
Link: https://lkml.kernel.org/r/20260113033403.161869-1-ranxiaokai627@163.com
Link: https://lkml.kernel.org/r/20260109104251.157767-1-ranxiaokai627@163.com
Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
Signed-off-by: Ran Xiaokai <ran.xiaokai(a)zte.com.cn>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Suren Baghdasaryan <surenb(a)google.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Pratyush Yadav <pratyush(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/liveupdate/kexec_handover.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/liveupdate/kexec_handover.c~kho-init-alloc-tags-when-restoring-pages-from-reserved-memory
+++ a/kernel/liveupdate/kexec_handover.c
@@ -255,6 +255,7 @@ static struct page *kho_restore_page(phy
if (is_folio && info.order)
prep_compound_page(page, info.order);
+ clear_page_tag_ref(page);
adjust_managed_page_count(page, nr_pages);
return page;
}
_
Patches currently in -mm which might be from ran.xiaokai(a)zte.com.cn are
kho-init-alloc-tags-when-restoring-pages-from-reserved-memory.patch
This is another one of those XGSPON ONU sticks that's using the
X-ONU-SFPP internally, thus it also requires the potron quirk to avoid tx
faults. So, add an entry for it in sfp_quirks[].
Cc: stable(a)vger.kernel.org
Signed-off-by: Hamza Mahfooz <someguy(a)effective-light.com>
---
drivers/net/phy/sfp.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 84bef5099dda..47f095bd91ce 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -519,6 +519,8 @@ static const struct sfp_quirk sfp_quirks[] = {
SFP_QUIRK_F("HALNy", "HL-GSFP", sfp_fixup_halny_gsfp),
+ SFP_QUIRK_F("H-COM", "SPP425H-GAB4", sfp_fixup_potron),
+
// HG MXPD-483II-F 2.5G supports 2500Base-X, but incorrectly reports
// 2600MBd in their EERPOM
SFP_QUIRK_S("HG GENUINE", "MXPD-483II", sfp_quirk_2500basex),
--
2.52.0
The patch titled
Subject: mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-failure-teach-kill_accessing_process-to-accept-hugetlb-tail-page-pfn.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Jane Chu <jane.chu(a)oracle.com>
Subject: mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn
Date: Tue, 13 Jan 2026 01:07:51 -0700
When a hugetlb folio is being poisoned again, try_memory_failure_hugetlb()
passed head pfn to kill_accessing_process(), that is not right. The
precise pfn of the poisoned page should be used in order to determine the
precise vaddr as the SIGBUS payload.
This issue has already been taken care of in the normal path, that is,
hwpoison_user_mappings(), see [1][2]. Further more, for [3] to work
correctly in the hugetlb repoisoning case, it's essential to inform VM the
precise poisoned page, not the head page.
Link: https://lkml.kernel.org/r/20260113080751.2173497-2-jane.chu@oracle.com
Link: https://lkml.kernel.org/r/20231218135837.3310403-1-willy@infradead.org [1]
Link: https://lkml.kernel.org/r/20250224211445.2663312-1-jane.chu@oracle.com [2]
Link: https://lore.kernel.org/lkml/20251116013223.1557158-1-jiaqiyan@google.com/ [3]
Signed-off-by: Jane Chu <jane.chu(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: David Hildenbrand <david(a)kernel.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: William Roche <william.roche(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-teach-kill_accessing_process-to-accept-hugetlb-tail-page-pfn
+++ a/mm/memory-failure.c
@@ -692,6 +692,8 @@ static int check_hwpoisoned_entry(pte_t
unsigned long poisoned_pfn, struct to_kill *tk)
{
unsigned long pfn = 0;
+ unsigned long hwpoison_vaddr;
+ unsigned long mask;
if (pte_present(pte)) {
pfn = pte_pfn(pte);
@@ -702,10 +704,12 @@ static int check_hwpoisoned_entry(pte_t
pfn = softleaf_to_pfn(entry);
}
- if (!pfn || pfn != poisoned_pfn)
+ mask = ~((1UL << (shift - PAGE_SHIFT)) - 1);
+ if (!pfn || ((pfn & mask) != (poisoned_pfn & mask)))
return 0;
- set_to_kill(tk, addr, shift);
+ hwpoison_vaddr = addr + ((poisoned_pfn - pfn) << PAGE_SHIFT);
+ set_to_kill(tk, hwpoison_vaddr, shift);
return 1;
}
@@ -2049,10 +2053,8 @@ retry:
return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
case MF_HUGETLB_FOLIO_PRE_POISONED:
case MF_HUGETLB_PAGE_PRE_POISON:
- if (flags & MF_ACTION_REQUIRED) {
- folio = page_folio(p);
- res = kill_accessing_process(current, folio_pfn(folio), flags);
- }
+ if (flags & MF_ACTION_REQUIRED)
+ res = kill_accessing_process(current, pfn, flags);
if (res == MF_HUGETLB_FOLIO_PRE_POISONED)
action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
else
_
Patches currently in -mm which might be from jane.chu(a)oracle.com are
mm-memory-failure-fix-missing-mf_stats-count-in-hugetlb-poison.patch
mm-memory-failure-teach-kill_accessing_process-to-accept-hugetlb-tail-page-pfn.patch
The patch titled
Subject: mm/memory-failure: fix missing ->mf_stats count in hugetlb poison
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-failure-fix-missing-mf_stats-count-in-hugetlb-poison.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Jane Chu <jane.chu(a)oracle.com>
Subject: mm/memory-failure: fix missing ->mf_stats count in hugetlb poison
Date: Tue, 13 Jan 2026 01:07:50 -0700
When a newly poisoned subpage ends up in an already poisoned hugetlb
folio, 'num_poisoned_pages' is incremented, but the per node ->mf_stats is
not. Fix the inconsistency by designating action_result() to update them
both.
While at it, define __get_huge_page_for_hwpoison() return values in terms
of symbol names for better readibility. Also rename
folio_set_hugetlb_hwpoison() to hugetlb_update_hwpoison() since the
function does more than the conventional bit setting and the fact three
possible return values are expected.
Link: https://lkml.kernel.org/r/20260113080751.2173497-1-jane.chu@oracle.com
Fixes: 18f41fa616ee4 ("mm: memory-failure: bump memory failure stats to pglist_data")
Signed-off-by: Jane Chu <jane.chu(a)oracle.com>
Cc: David Hildenbrand <david(a)kernel.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Jiaqi Yan <jiaqiyan(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: William Roche <william.roche(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 75 +++++++++++++++++++++++++-----------------
1 file changed, 45 insertions(+), 30 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-fix-missing-mf_stats-count-in-hugetlb-poison
+++ a/mm/memory-failure.c
@@ -1883,12 +1883,24 @@ static unsigned long __folio_free_raw_hw
return count;
}
-static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
+#define MF_HUGETLB_FOLIO_PRE_POISONED 3 /* folio already poisoned */
+#define MF_HUGETLB_PAGE_PRE_POISON 4 /* exact page already poisoned */
+/*
+ * Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list
+ * to keep track of the poisoned pages.
+ * Return:
+ * 0: folio was not already poisoned;
+ * MF_HUGETLB_FOLIO_PRE_POISONED: folio was already poisoned: either
+ * multiple pages being poisoned, or per page information unclear,
+ * MF_HUGETLB_PAGE_PRE_POISON: folio was already poisoned, an exact
+ * poisoned page is being consumed again.
+ */
+static int hugetlb_update_hwpoison(struct folio *folio, struct page *page)
{
struct llist_head *head;
struct raw_hwp_page *raw_hwp;
struct raw_hwp_page *p;
- int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0;
+ int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_FOLIO_PRE_POISONED : 0;
/*
* Once the hwpoison hugepage has lost reliable raw error info,
@@ -1896,20 +1908,17 @@ static int folio_set_hugetlb_hwpoison(st
* so skip to add additional raw error info.
*/
if (folio_test_hugetlb_raw_hwp_unreliable(folio))
- return -EHWPOISON;
+ return MF_HUGETLB_FOLIO_PRE_POISONED;
head = raw_hwp_list_head(folio);
llist_for_each_entry(p, head->first, node) {
if (p->page == page)
- return -EHWPOISON;
+ return MF_HUGETLB_PAGE_PRE_POISON;
}
raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC);
if (raw_hwp) {
raw_hwp->page = page;
llist_add(&raw_hwp->node, head);
- /* the first error event will be counted in action_result(). */
- if (ret)
- num_poisoned_pages_inc(page_to_pfn(page));
} else {
/*
* Failed to save raw error info. We no longer trace all
@@ -1955,44 +1964,43 @@ void folio_clear_hugetlb_hwpoison(struct
folio_free_raw_hwp(folio, true);
}
+#define MF_HUGETLB_FREED 0 /* freed hugepage */
+#define MF_HUGETLB_IN_USED 1 /* in-use hugepage */
/*
* Called from hugetlb code with hugetlb_lock held.
- *
- * Return values:
- * 0 - free hugepage
- * 1 - in-use hugepage
- * 2 - not a hugepage
- * -EBUSY - the hugepage is busy (try to retry)
- * -EHWPOISON - the hugepage is already hwpoisoned
*/
int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared)
{
struct page *page = pfn_to_page(pfn);
struct folio *folio = page_folio(page);
- int ret = 2; /* fallback to normal page handling */
+ int ret = -EINVAL;
bool count_increased = false;
+ int rc;
if (!folio_test_hugetlb(folio))
goto out;
if (flags & MF_COUNT_INCREASED) {
- ret = 1;
+ ret = MF_HUGETLB_IN_USED;
count_increased = true;
} else if (folio_test_hugetlb_freed(folio)) {
- ret = 0;
+ ret = MF_HUGETLB_FREED;
} else if (folio_test_hugetlb_migratable(folio)) {
- ret = folio_try_get(folio);
- if (ret)
+ if (folio_try_get(folio)) {
+ ret = MF_HUGETLB_IN_USED;
count_increased = true;
+ } else
+ ret = MF_HUGETLB_FREED;
} else {
ret = -EBUSY;
if (!(flags & MF_NO_RETRY))
goto out;
}
- if (folio_set_hugetlb_hwpoison(folio, page)) {
- ret = -EHWPOISON;
+ rc = hugetlb_update_hwpoison(folio, page);
+ if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) {
+ ret = rc;
goto out;
}
@@ -2029,22 +2037,29 @@ static int try_memory_failure_hugetlb(un
*hugetlb = 1;
retry:
res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
- if (res == 2) { /* fallback to normal page handling */
+ switch (res) {
+ case -EINVAL: /* fallback to normal page handling */
*hugetlb = 0;
return 0;
- } else if (res == -EHWPOISON) {
- if (flags & MF_ACTION_REQUIRED) {
- folio = page_folio(p);
- res = kill_accessing_process(current, folio_pfn(folio), flags);
- }
- action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
- return res;
- } else if (res == -EBUSY) {
+ case -EBUSY:
if (!(flags & MF_NO_RETRY)) {
flags |= MF_NO_RETRY;
goto retry;
}
return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
+ case MF_HUGETLB_FOLIO_PRE_POISONED:
+ case MF_HUGETLB_PAGE_PRE_POISON:
+ if (flags & MF_ACTION_REQUIRED) {
+ folio = page_folio(p);
+ res = kill_accessing_process(current, folio_pfn(folio), flags);
+ }
+ if (res == MF_HUGETLB_FOLIO_PRE_POISONED)
+ action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
+ else
+ action_result(pfn, MF_MSG_HUGE, MF_FAILED);
+ return res;
+ default:
+ break;
}
folio = page_folio(p);
_
Patches currently in -mm which might be from jane.chu(a)oracle.com are
mm-memory-failure-fix-missing-mf_stats-count-in-hugetlb-poison.patch
mm-memory-failure-teach-kill_accessing_process-to-accept-hugetlb-tail-page-pfn.patch
When a newly poisoned subpage ends up in an already poisoned hugetlb
folio, 'num_poisoned_pages' is incremented, but the per node ->mf_stats
is not. Fix the inconsistency by designating action_result() to update
them both.
While at it, define __get_huge_page_for_hwpoison() return values in terms
of symbol names for better readibility. Also rename
folio_set_hugetlb_hwpoison() to hugetlb_update_hwpoison() since the
function does more than the conventional bit setting and the fact
three possible return values are expected.
Fixes: 18f41fa616ee4 ("mm: memory-failure: bump memory failure stats to pglist_data")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jane Chu <jane.chu(a)oracle.com>
---
v3 -> v4:
incorporate/adapt David's suggestions.
v2 -> v3:
No change.
v1 -> v2:
adapted David and Liam's comment, define __get_huge_page_for_hwpoison()
return values in terms of symbol names instead of naked integers for better
readibility. #define instead of enum is used since the function has footprint
outside MF, just try to limit the MF specifics local.
also renamed folio_set_hugetlb_hwpoison() to hugetlb_update_hwpoison()
since the function does more than the conventional bit setting and the
fact three possible return values are expected.
---
mm/memory-failure.c | 75 +++++++++++++++++++++++++++------------------
1 file changed, 45 insertions(+), 30 deletions(-)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index fbc5a01260c8..b3e27451d618 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1883,12 +1883,24 @@ static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
return count;
}
-static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
+#define MF_HUGETLB_FOLIO_PRE_POISONED 3 /* folio already poisoned */
+#define MF_HUGETLB_PAGE_PRE_POISON 4 /* exact page already poisoned */
+/*
+ * Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list
+ * to keep track of the poisoned pages.
+ * Return:
+ * 0: folio was not already poisoned;
+ * MF_HUGETLB_FOLIO_PRE_POISONED: folio was already poisoned: either
+ * multiple pages being poisoned, or per page information unclear,
+ * MF_HUGETLB_PAGE_PRE_POISON: folio was already poisoned, an exact
+ * poisoned page is being consumed again.
+ */
+static int hugetlb_update_hwpoison(struct folio *folio, struct page *page)
{
struct llist_head *head;
struct raw_hwp_page *raw_hwp;
struct raw_hwp_page *p;
- int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0;
+ int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_FOLIO_PRE_POISONED : 0;
/*
* Once the hwpoison hugepage has lost reliable raw error info,
@@ -1896,20 +1908,17 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
* so skip to add additional raw error info.
*/
if (folio_test_hugetlb_raw_hwp_unreliable(folio))
- return -EHWPOISON;
+ return MF_HUGETLB_FOLIO_PRE_POISONED;
head = raw_hwp_list_head(folio);
llist_for_each_entry(p, head->first, node) {
if (p->page == page)
- return -EHWPOISON;
+ return MF_HUGETLB_PAGE_PRE_POISON;
}
raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC);
if (raw_hwp) {
raw_hwp->page = page;
llist_add(&raw_hwp->node, head);
- /* the first error event will be counted in action_result(). */
- if (ret)
- num_poisoned_pages_inc(page_to_pfn(page));
} else {
/*
* Failed to save raw error info. We no longer trace all
@@ -1955,44 +1964,43 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
folio_free_raw_hwp(folio, true);
}
+#define MF_HUGETLB_FREED 0 /* freed hugepage */
+#define MF_HUGETLB_IN_USED 1 /* in-use hugepage */
/*
* Called from hugetlb code with hugetlb_lock held.
- *
- * Return values:
- * 0 - free hugepage
- * 1 - in-use hugepage
- * 2 - not a hugepage
- * -EBUSY - the hugepage is busy (try to retry)
- * -EHWPOISON - the hugepage is already hwpoisoned
*/
int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared)
{
struct page *page = pfn_to_page(pfn);
struct folio *folio = page_folio(page);
- int ret = 2; /* fallback to normal page handling */
+ int ret = -EINVAL;
bool count_increased = false;
+ int rc;
if (!folio_test_hugetlb(folio))
goto out;
if (flags & MF_COUNT_INCREASED) {
- ret = 1;
+ ret = MF_HUGETLB_IN_USED;
count_increased = true;
} else if (folio_test_hugetlb_freed(folio)) {
- ret = 0;
+ ret = MF_HUGETLB_FREED;
} else if (folio_test_hugetlb_migratable(folio)) {
- ret = folio_try_get(folio);
- if (ret)
+ if (folio_try_get(folio)) {
+ ret = MF_HUGETLB_IN_USED;
count_increased = true;
+ } else
+ ret = MF_HUGETLB_FREED;
} else {
ret = -EBUSY;
if (!(flags & MF_NO_RETRY))
goto out;
}
- if (folio_set_hugetlb_hwpoison(folio, page)) {
- ret = -EHWPOISON;
+ rc = hugetlb_update_hwpoison(folio, page);
+ if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) {
+ ret = rc;
goto out;
}
@@ -2029,22 +2037,29 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
*hugetlb = 1;
retry:
res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
- if (res == 2) { /* fallback to normal page handling */
+ switch (res) {
+ case -EINVAL: /* fallback to normal page handling */
*hugetlb = 0;
return 0;
- } else if (res == -EHWPOISON) {
- if (flags & MF_ACTION_REQUIRED) {
- folio = page_folio(p);
- res = kill_accessing_process(current, folio_pfn(folio), flags);
- }
- action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
- return res;
- } else if (res == -EBUSY) {
+ case -EBUSY:
if (!(flags & MF_NO_RETRY)) {
flags |= MF_NO_RETRY;
goto retry;
}
return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
+ case MF_HUGETLB_FOLIO_PRE_POISONED:
+ case MF_HUGETLB_PAGE_PRE_POISON:
+ if (flags & MF_ACTION_REQUIRED) {
+ folio = page_folio(p);
+ res = kill_accessing_process(current, folio_pfn(folio), flags);
+ }
+ if (res == MF_HUGETLB_FOLIO_PRE_POISONED)
+ action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
+ else
+ action_result(pfn, MF_MSG_HUGE, MF_FAILED);
+ return res;
+ default:
+ break;
}
folio = page_folio(p);
--
2.43.5
The patch titled
Subject: mm/kasan: fix KASAN poisoning in vrealloc()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-kasan-fix-kasan-poisoning-in-vrealloc.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Subject: mm/kasan: fix KASAN poisoning in vrealloc()
Date: Tue, 13 Jan 2026 20:15:15 +0100
A KASAN warning can be triggered when vrealloc() changes the requested
size to a value that is not aligned to KASAN_GRANULE_SIZE.
------------[ cut here ]------------
WARNING: CPU: 2 PID: 1 at mm/kasan/shadow.c:174 kasan_unpoison+0x40/0x48
...
pc : kasan_unpoison+0x40/0x48
lr : __kasan_unpoison_vmalloc+0x40/0x68
Call trace:
kasan_unpoison+0x40/0x48 (P)
vrealloc_node_align_noprof+0x200/0x320
bpf_patch_insn_data+0x90/0x2f0
convert_ctx_accesses+0x8c0/0x1158
bpf_check+0x1488/0x1900
bpf_prog_load+0xd20/0x1258
__sys_bpf+0x96c/0xdf0
__arm64_sys_bpf+0x50/0xa0
invoke_syscall+0x90/0x160
Introduce a dedicated kasan_vrealloc() helper that centralizes KASAN
handling for vmalloc reallocations. The helper accounts for KASAN granule
alignment when growing or shrinking an allocation and ensures that partial
granules are handled correctly.
Use this helper from vrealloc_node_align_noprof() to fix poisoning logic.
Link: https://lkml.kernel.org/r/20260113191516.31015-1-ryabinin.a.a@gmail.com
Fixes: d699440f58ce ("mm: fix vrealloc()'s KASAN poisoning logic")
Signed-off-by: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Reported-by: Maciej ��enczykowski <maze(a)google.com>
Reported-by: <joonki.min(a)samsung-slsi.corp-partner.google.com>
Closes: https://lkml.kernel.org/r/CANP3RGeuRW53vukDy7WDO3FiVgu34-xVJYkfpm08oLO3odYF…
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: Uladzislau Rezki <urezki(a)gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/kasan.h | 6 ++++++
mm/kasan/shadow.c | 24 ++++++++++++++++++++++++
mm/vmalloc.c | 7 ++-----
3 files changed, 32 insertions(+), 5 deletions(-)
--- a/include/linux/kasan.h~mm-kasan-fix-kasan-poisoning-in-vrealloc
+++ a/include/linux/kasan.h
@@ -641,6 +641,9 @@ kasan_unpoison_vmap_areas(struct vm_stru
__kasan_unpoison_vmap_areas(vms, nr_vms, flags);
}
+void kasan_vrealloc(const void *start, unsigned long old_size,
+ unsigned long new_size);
+
#else /* CONFIG_KASAN_VMALLOC */
static inline void kasan_populate_early_vm_area_shadow(void *start,
@@ -670,6 +673,9 @@ kasan_unpoison_vmap_areas(struct vm_stru
kasan_vmalloc_flags_t flags)
{ }
+static inline void kasan_vrealloc(const void *start, unsigned long old_size,
+ unsigned long new_size) { }
+
#endif /* CONFIG_KASAN_VMALLOC */
#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
--- a/mm/kasan/shadow.c~mm-kasan-fix-kasan-poisoning-in-vrealloc
+++ a/mm/kasan/shadow.c
@@ -651,6 +651,30 @@ void __kasan_poison_vmalloc(const void *
kasan_poison(start, size, KASAN_VMALLOC_INVALID, false);
}
+void kasan_vrealloc(const void *addr, unsigned long old_size,
+ unsigned long new_size)
+{
+ if (!kasan_enabled())
+ return;
+
+ if (new_size < old_size) {
+ kasan_poison_last_granule(addr, new_size);
+
+ new_size = round_up(new_size, KASAN_GRANULE_SIZE);
+ old_size = round_up(old_size, KASAN_GRANULE_SIZE);
+ if (new_size < old_size)
+ __kasan_poison_vmalloc(addr + new_size,
+ old_size - new_size);
+ } else if (new_size > old_size) {
+ old_size = round_down(old_size, KASAN_GRANULE_SIZE);
+ __kasan_unpoison_vmalloc(addr + old_size,
+ new_size - old_size,
+ KASAN_VMALLOC_PROT_NORMAL |
+ KASAN_VMALLOC_VM_ALLOC |
+ KASAN_VMALLOC_KEEP_TAG);
+ }
+}
+
#else /* CONFIG_KASAN_VMALLOC */
int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask)
--- a/mm/vmalloc.c~mm-kasan-fix-kasan-poisoning-in-vrealloc
+++ a/mm/vmalloc.c
@@ -4322,7 +4322,7 @@ void *vrealloc_node_align_noprof(const v
if (want_init_on_free() || want_init_on_alloc(flags))
memset((void *)p + size, 0, old_size - size);
vm->requested_size = size;
- kasan_poison_vmalloc(p + size, old_size - size);
+ kasan_vrealloc(p, old_size, size);
return (void *)p;
}
@@ -4330,16 +4330,13 @@ void *vrealloc_node_align_noprof(const v
* We already have the bytes available in the allocation; use them.
*/
if (size <= alloced_size) {
- kasan_unpoison_vmalloc(p + old_size, size - old_size,
- KASAN_VMALLOC_PROT_NORMAL |
- KASAN_VMALLOC_VM_ALLOC |
- KASAN_VMALLOC_KEEP_TAG);
/*
* No need to zero memory here, as unused memory will have
* already been zeroed at initial allocation time or during
* realloc shrink time.
*/
vm->requested_size = size;
+ kasan_vrealloc(p, old_size, size);
return (void *)p;
}
_
Patches currently in -mm which might be from ryabinin.a.a(a)gmail.com are
mm-kasan-fix-kasan-poisoning-in-vrealloc.patch