The quilt patch titled
Subject: mm/memblock: correct totalram_pages accounting with KMSAN
has been removed from the -mm tree. Its filename was
mm-memblock-correct-totalram_pages-accounting-with-kmsan.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Alexander Potapenko <glider(a)google.com>
Subject: mm/memblock: correct totalram_pages accounting with KMSAN
Date: Wed, 24 Sep 2025 12:03:01 +0200
When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
for metadata instead of returning them to the early allocator. The
callers, however, would unconditionally increment `totalram_pages`,
assuming the pages were always freed. This resulted in an incorrect
calculation of the total available RAM, causing the kernel to believe it
had more memory than it actually did.
This patch refactors `memblock_free_pages()` to return the number of pages
it successfully frees. If KMSAN stashes the pages, the function now
returns 0; otherwise, it returns the number of pages in the block.
The callers in `memblock.c` have been updated to use this return value,
ensuring that `totalram_pages` is incremented only by the number of pages
actually returned to the allocator. This corrects the total RAM
accounting when KMSAN is active.
Link: https://lkml.kernel.org/r/20250924100301.1558645-1-glider@google.com
Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
Signed-off-by: Alexander Potapenko <glider(a)google.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: Aleksandr Nogikh <nogikh(a)google.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: Marco Elver <elver(a)google.com>
Cc: Markus Elfring <Markus.Elfring(a)web.de>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/internal.h | 4 ++--
mm/memblock.c | 21 +++++++++++----------
mm/mm_init.c | 9 +++++----
3 files changed, 18 insertions(+), 16 deletions(-)
--- a/mm/internal.h~mm-memblock-correct-totalram_pages-accounting-with-kmsan
+++ a/mm/internal.h
@@ -742,8 +742,8 @@ static inline void clear_zone_contiguous
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __putback_isolated_page(struct page *page, unsigned int order,
int mt);
-extern void memblock_free_pages(struct page *page, unsigned long pfn,
- unsigned int order);
+unsigned long memblock_free_pages(struct page *page, unsigned long pfn,
+ unsigned int order);
extern void __free_pages_core(struct page *page, unsigned int order,
enum meminit_context context);
--- a/mm/memblock.c~mm-memblock-correct-totalram_pages-accounting-with-kmsan
+++ a/mm/memblock.c
@@ -1826,6 +1826,7 @@ void *__init __memblock_alloc_or_panic(p
void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
{
phys_addr_t cursor, end;
+ unsigned long freed_pages = 0;
end = base + size - 1;
memblock_dbg("%s: [%pa-%pa] %pS\n",
@@ -1834,10 +1835,9 @@ void __init memblock_free_late(phys_addr
cursor = PFN_UP(base);
end = PFN_DOWN(base + size);
- for (; cursor < end; cursor++) {
- memblock_free_pages(pfn_to_page(cursor), cursor, 0);
- totalram_pages_inc();
- }
+ for (; cursor < end; cursor++)
+ freed_pages += memblock_free_pages(pfn_to_page(cursor), cursor, 0);
+ totalram_pages_add(freed_pages);
}
/*
@@ -2259,9 +2259,11 @@ static void __init free_unused_memmap(vo
#endif
}
-static void __init __free_pages_memory(unsigned long start, unsigned long end)
+static unsigned long __init __free_pages_memory(unsigned long start,
+ unsigned long end)
{
int order;
+ unsigned long freed = 0;
while (start < end) {
/*
@@ -2279,14 +2281,15 @@ static void __init __free_pages_memory(u
while (start + (1UL << order) > end)
order--;
- memblock_free_pages(pfn_to_page(start), start, order);
+ freed += memblock_free_pages(pfn_to_page(start), start, order);
start += (1UL << order);
}
+ return freed;
}
static unsigned long __init __free_memory_core(phys_addr_t start,
- phys_addr_t end)
+ phys_addr_t end)
{
unsigned long start_pfn = PFN_UP(start);
unsigned long end_pfn = PFN_DOWN(end);
@@ -2297,9 +2300,7 @@ static unsigned long __init __free_memor
if (start_pfn >= end_pfn)
return 0;
- __free_pages_memory(start_pfn, end_pfn);
-
- return end_pfn - start_pfn;
+ return __free_pages_memory(start_pfn, end_pfn);
}
static void __init memmap_init_reserved_pages(void)
--- a/mm/mm_init.c~mm-memblock-correct-totalram_pages-accounting-with-kmsan
+++ a/mm/mm_init.c
@@ -2547,24 +2547,25 @@ void *__init alloc_large_system_hash(con
return table;
}
-void __init memblock_free_pages(struct page *page, unsigned long pfn,
- unsigned int order)
+unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
+ unsigned int order)
{
if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
int nid = early_pfn_to_nid(pfn);
if (!early_page_initialised(pfn, nid))
- return;
+ return 0;
}
if (!kmsan_memblock_free_pages(page, order)) {
/* KMSAN will take care of these pages. */
- return;
+ return 0;
}
/* pages were reserved and not allocated */
clear_page_tag_ref(page);
__free_pages_core(page, order, MEMINIT_EARLY);
+ return 1UL << order;
}
DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
_
Patches currently in -mm which might be from glider(a)google.com are
The patch titled
Subject: hugetlbfs: skip VMAs without shareable locks in hugetlb_vmdelete_list
has been added to the -mm mm-new branch. Its filename is
hugetlbfs-skip-vmas-without-shareable-locks-in-hugetlb_vmdelete_list.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Deepanshu Kartikey <kartikey406(a)gmail.com>
Subject: hugetlbfs: skip VMAs without shareable locks in hugetlb_vmdelete_list
Date: Thu, 25 Sep 2025 20:19:32 +0530
hugetlb_vmdelete_list() uses trylock to acquire VMA locks during truncate
operations. As per the original design in commit 40549ba8f8e0 ("hugetlb:
use new vma_lock for pmd sharing synchronization"), if the trylock fails
or the VMA has no lock, it should skip that VMA. Any remaining mapped
pages are handled by remove_inode_hugepages() which is called after
hugetlb_vmdelete_list() and uses proper lock ordering to guarantee
unmapping success.
Currently, when hugetlb_vma_trylock_write() returns success (1) for VMAs
without shareable locks, the code proceeds to call unmap_hugepage_range().
This causes assertion failures in huge_pmd_unshare() ���
hugetlb_vma_assert_locked() because no lock is actually held:
WARNING: CPU: 1 PID: 6594 Comm: syz.0.28 Not tainted
Call Trace:
hugetlb_vma_assert_locked+0x1dd/0x250
huge_pmd_unshare+0x2c8/0x540
__unmap_hugepage_range+0x6e3/0x1aa0
unmap_hugepage_range+0x32e/0x410
hugetlb_vmdelete_list+0x189/0x1f0
Fix by explicitly skipping VMAs without shareable locks after trylock
succeeds, consistent with the original design where such VMAs are deferred
to remove_inode_hugepages() for proper handling.
Link: https://lkml.kernel.org/r/20250925144934.150299-1-kartikey406@gmail.com
Signed-off-by: Deepanshu Kartikey <kartikey406(a)gmail.com>
Reported-by: syzbot+f26d7c75c26ec19790e7(a)syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=f26d7c75c26ec19790e7
Fixes: 40549ba8f8e0 ("hugetlb: use new vma_lock for pmd sharing synchronization")
Tested-by: syzbot+f26d7c75c26ec19790e7(a)syzkaller.appspotmail.com
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/hugetlbfs/inode.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/fs/hugetlbfs/inode.c~hugetlbfs-skip-vmas-without-shareable-locks-in-hugetlb_vmdelete_list
+++ a/fs/hugetlbfs/inode.c
@@ -487,7 +487,8 @@ hugetlb_vmdelete_list(struct rb_root_cac
if (!hugetlb_vma_trylock_write(vma))
continue;
-
+ if (!__vma_shareable_lock(vma))
+ continue;
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
_
Patches currently in -mm which might be from kartikey406(a)gmail.com are
hugetlbfs-skip-vmas-without-shareable-locks-in-hugetlb_vmdelete_list.patch
When PAGEMAP_SCAN ioctl invoked with vec_len = 0 reaches
pagemap_scan_backout_range(), kernel panics with null-ptr-deref:
[ 44.936808] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI
[ 44.937797] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
[ 44.938391] CPU: 1 UID: 0 PID: 2480 Comm: reproducer Not tainted 6.17.0-rc6 #22 PREEMPT(none)
[ 44.939062] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 44.939935] RIP: 0010:pagemap_scan_thp_entry.isra.0+0x741/0xa80
<snip registers, unreliable trace>
[ 44.946828] Call Trace:
[ 44.947030] <TASK>
[ 44.949219] pagemap_scan_pmd_entry+0xec/0xfa0
[ 44.952593] walk_pmd_range.isra.0+0x302/0x910
[ 44.954069] walk_pud_range.isra.0+0x419/0x790
[ 44.954427] walk_p4d_range+0x41e/0x620
[ 44.954743] walk_pgd_range+0x31e/0x630
[ 44.955057] __walk_page_range+0x160/0x670
[ 44.956883] walk_page_range_mm+0x408/0x980
[ 44.958677] walk_page_range+0x66/0x90
[ 44.958984] do_pagemap_scan+0x28d/0x9c0
[ 44.961833] do_pagemap_cmd+0x59/0x80
[ 44.962484] __x64_sys_ioctl+0x18d/0x210
[ 44.962804] do_syscall_64+0x5b/0x290
[ 44.963111] entry_SYSCALL_64_after_hwframe+0x76/0x7e
vec_len = 0 in pagemap_scan_init_bounce_buffer() means no buffers are
allocated and p->vec_buf remains set to NULL.
This breaks an assumption made later in pagemap_scan_backout_range(),
that page_region is always allocated for p->vec_buf_index.
Fix it by explicitly checking p->vec_buf for NULL before dereferencing.
Other sites that might run into same deref-issue are already (directly
or transitively) protected by checking p->vec_buf.
Note:
From PAGEMAP_SCAN man page, it seems vec_len = 0 is valid when no output
is requested and it's only the side effects caller is interested in,
hence it passes check in pagemap_scan_get_args().
This issue was found by syzkaller.
Fixes: 52526ca7fdb9 ("fs/proc/task_mmu: implement IOCTL to get and optionally clear info about PTEs")
Signed-off-by: Jakub Acs <acsjakub(a)amazon.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Jinjiang Tu <tujinjiang(a)huawei.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Penglei Jiang <superman.xpt(a)gmail.com>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: "Michał Mirosław" <mirq-linux(a)rere.qmqm.pl>
Cc: Stephen Rothwell <sfr(a)canb.auug.org.au>
Cc: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Cc: linux-kernel(a)vger.kernel.org
Cc: linux-fsdevel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
fs/proc/task_mmu.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 29cca0e6d0ff..b26ae556b446 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2417,6 +2417,9 @@ static void pagemap_scan_backout_range(struct pagemap_scan_private *p,
{
struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
+ if (!p->vec_buf)
+ return;
+
if (cur_buf->start != addr)
cur_buf->end = addr;
else
--
2.47.3
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
devm_kcalloc() may fail. ndtest_probe() allocates three DMA address
arrays (dcr_dma, label_dma, dimm_dma) and later unconditionally uses
them in ndtest_nvdimm_init(), which can lead to a NULL pointer
dereference under low-memory conditions.
Check all three allocations and return -ENOMEM if any allocation fails,
jumping to the common error path. Do not emit an extra error message
since the allocator already warns on allocation failure.
Fixes: 9399ab61ad82 ("ndtest: Add dimms to the two buses")
Cc: stable(a)vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244(a)gmail.com>
---
changelog:
v3:
- Add NULL checks for all three devm_kcalloc() calls and goto the common
error label on failure.
v2:
- Drop pr_err() on allocation failure; only NULL-check and return -ENOMEM.
- No other changes.
---
tools/testing/nvdimm/test/ndtest.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 68a064ce598c..8e3b6be53839 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -850,11 +850,22 @@ static int ndtest_probe(struct platform_device *pdev)
p->dcr_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->dcr_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->label_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->label_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
-
+ if (!p->dimm_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
rc = ndtest_nvdimm_init(p);
if (rc)
goto err;
--
2.43.0
I have a couple more fixes I'm testing but the issues have
been with us for a long time, and they come from
code review not from the field IIUC so no rush I think.
The following changes since commit 76eeb9b8de9880ca38696b2fb56ac45ac0a25c6c:
Linux 6.17-rc5 (2025-09-07 14:22:57 -0700)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus
for you to fetch changes up to cde7e7c3f8745a61458cea61aa28f37c3f5ae2b4:
MAINTAINERS, mailmap: Update address for Peter Hilber (2025-09-21 17:44:20 -0400)
----------------------------------------------------------------
virtio,vhost: last minute fixes
More small fixes. Most notably this fixes crashes and hangs in
vhost-net.
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
----------------------------------------------------------------
Alok Tiwari (1):
vhost-scsi: fix argument order in tport allocation error message
Alyssa Ross (1):
virtio_config: clarify output parameters
Ashwini Sahu (1):
uapi: vduse: fix typo in comment
Jason Wang (2):
vhost-net: unbreak busy polling
vhost-net: flush batched before enabling notifications
Michael S. Tsirkin (1):
Revert "vhost/net: Defer TX queue re-enable until after sendmsg"
Peter Hilber (1):
MAINTAINERS, mailmap: Update address for Peter Hilber
Sebastian Andrzej Siewior (1):
vhost: Take a reference on the task in struct vhost_task.
.mailmap | 1 +
MAINTAINERS | 2 +-
drivers/vhost/net.c | 40 +++++++++++++++++-----------------------
drivers/vhost/scsi.c | 2 +-
include/linux/virtio_config.h | 11 ++++++-----
include/uapi/linux/vduse.h | 2 +-
kernel/vhost_task.c | 3 ++-
7 files changed, 29 insertions(+), 32 deletions(-)