A malicious user could pass an arbitrarily bad value
to memdup_user_nul(), potentially causing kernel crash.
This follows the same pattern as commit ee76746387f6
("netdevsim: prevent bad user input in nsim_dev_health_break_write()")
Found via static analysis and code review.
Fixes: 3783225130f0 ("powerpc/pseries: use memdup_user_nul")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
arch/powerpc/platforms/pseries/reconfig.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 599bd2c78514..b6bc1d8b2207 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -366,6 +366,9 @@ static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t coun
if (rv)
return rv;
+ if (count == 0 || count > PAGE_SIZE)
+ return -EINVAL;
+
kbuf = memdup_user_nul(buf, count);
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
--
2.39.5 (Apple Git-154)
The code did not check the return value of usbnet_get_endpoints.
Add checks and return the error if it fails to transfer the error.
Found via static anlaysis and this is similar to
commit 07161b2416f7 ("sr9800: Add check for usbnet_get_endpoints").
Fixes: 933a27d39e0e ("USB: asix - Add AX88178 support and many other changes")
Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
- v1:http://lore.kernel.org/all/20250830103743.2118777-1-linmq006@gmail.com
changes in v2:
- fix the blank line.
- update message to clarify how this is detected
- add Cc: stable
---
drivers/net/usb/asix_devices.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 85bd5d845409..232bbd79a4de 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -230,7 +230,9 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
int i;
unsigned long gpio_bits = dev->driver_info->data;
- usbnet_get_endpoints(dev,intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ goto out;
/* Toggle the GPIOs in a manufacturer/model specific way */
for (i = 2; i >= 0; i--) {
@@ -848,7 +850,9 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
dev->driver_priv = priv;
- usbnet_get_endpoints(dev, intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ return ret;
/* Maybe the boot loader passed the MAC address via device tree */
if (!eth_platform_get_mac_address(&dev->udev->dev, buf)) {
@@ -1281,7 +1285,9 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf)
int ret;
u8 buf[ETH_ALEN] = {0};
- usbnet_get_endpoints(dev,intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ return ret;
/* Get the MAC address */
ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
--
2.39.5 (Apple Git-154)
Here are various fixes from Paolo, addressing very occasional issues on
the sending side:
- Patch 1: drop an optimisation that could lead to timeout in case of
race conditions. A fix for up to v5.11.
- Patch 2: fix stream corruption under very specific conditions. A fix
for up to v5.13.
- Patch 3: restore MPTCP-level zero window probe after a recent fix. A
fix for up to v5.16.
- Patch 4: new MIB counter to track MPTCP-level zero windows probe to
help catching issues similar to the one fixed by the previous patch.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Paolo Abeni (4):
mptcp: drop bogus optimization in __mptcp_check_push()
mptcp: fix MSG_PEEK stream corruption
mptcp: restore window probe
mptcp: zero window probe mib
net/mptcp/mib.c | 1 +
net/mptcp/mib.h | 1 +
net/mptcp/protocol.c | 57 +++++++++++++++++++++++++++++++++-------------------
net/mptcp/protocol.h | 2 +-
4 files changed, 39 insertions(+), 22 deletions(-)
---
base-commit: 210b35d6a7ea415494ce75490c4b43b4e717d935
change-id: 20251027-net-mptcp-send-timeout-7fc1474fd849
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
The patch titled
Subject: kasan: unpoison vms[area] addresses with a common tag
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kasan-unpoison-vms-addresses-with-a-common-tag.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Subject: kasan: unpoison vms[area] addresses with a common tag
Date: Wed, 29 Oct 2025 19:06:03 +0000
The problem presented here is related to NUMA systems and tag-based KASAN
modes - software and hardware ones. It can be explained in the following
points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits the
tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the first
chunk.
5. Thus, the subsequent chunks need to have their tag set to match
that of the first chunk.
Unpoison all vms[]->addr memory and pointers with the same tag to resolve
the mismatch.
Link: https://lkml.kernel.org/r/932121edc75be8e2038d64ecb4853df2e2b258df.17617636…
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Bill Wendling <morbo(a)google.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: FUJITA Tomonori <fujita.tomonori(a)gmail.com>
Cc: Guilherme Giacomo Simoes <trintaeoitogc(a)gmail.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Jeremy Linton <jeremy.linton(a)arm.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Justin Stitt <justinstitt(a)google.com>
Cc: Kalesh Singh <kaleshsingh(a)google.com>
Cc: Kees Cook <kees(a)kernel.org>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: levi.yun <yeoreum.yun(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Marco Elver <elver(a)google.com>
Cc: Marc Rutland <mark.rutland(a)arm.com>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Miguel Ojeda <ojeda(a)kernel.org>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Mostafa Saleh <smostafa(a)google.com>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Pankaj Gupta <pankaj.gupta(a)amd.com>
Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Samuel Holland <samuel.holland(a)sifive.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Thomas Huth <thuth(a)redhat.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: Uros Bizjak <ubizjak(a)gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Will Deacon <will(a)kernel.org>
Cc: Xin Li (Intel) <xin(a)zytor.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org> [6.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kasan/tags.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
--- a/mm/kasan/tags.c~kasan-unpoison-vms-addresses-with-a-common-tag
+++ a/mm/kasan/tags.c
@@ -148,12 +148,20 @@ void __kasan_save_free_info(struct kmem_
save_stack_info(cache, object, 0, true);
}
+/*
+ * A tag mismatch happens when calculating per-cpu chunk addresses, because
+ * they all inherit the tag from vms[0]->addr, even when nr_vms is bigger
+ * than 1. This is a problem because all the vms[]->addr come from separate
+ * allocations and have different tags so while the calculated address is
+ * correct the tag isn't.
+ */
void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
{
int area;
for (area = 0 ; area < nr_vms ; area++) {
kasan_poison(vms[area]->addr, vms[area]->size,
- arch_kasan_get_tag(vms[area]->addr), false);
+ arch_kasan_get_tag(vms[0]->addr), false);
+ arch_kasan_set_tag(vms[area]->addr, arch_kasan_get_tag(vms[0]->addr));
}
}
_
Patches currently in -mm which might be from maciej.wieczor-retman(a)intel.com are
kasan-unpoison-pcpu-chunks-with-base-address-tag.patch
kasan-unpoison-vms-addresses-with-a-common-tag.patch
The patch titled
Subject: kasan: unpoison pcpu chunks with base address tag
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kasan-unpoison-pcpu-chunks-with-base-address-tag.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Subject: kasan: unpoison pcpu chunks with base address tag
Date: Wed, 29 Oct 2025 19:05:49 +0000
The problem presented here is related to NUMA systems and tag-based KASAN
modes - software and hardware ones. It can be explained in the following
points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits the
tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the first
chunk.
5. Thus, the subsequent chunks need to have their tag set to match
that of the first chunk.
Refactor code by moving it into a helper in preparation for the actual
fix.
Link: https://lkml.kernel.org/r/fbce40a59b0a22a5735cb6e9b95c5a45a34b23cb.17617636…
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Bill Wendling <morbo(a)google.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: FUJITA Tomonori <fujita.tomonori(a)gmail.com>
Cc: Guilherme Giacomo Simoes <trintaeoitogc(a)gmail.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Jeremy Linton <jeremy.linton(a)arm.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Justin Stitt <justinstitt(a)google.com>
Cc: Kalesh Singh <kaleshsingh(a)google.com>
Cc: Kees Cook <kees(a)kernel.org>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: levi.yun <yeoreum.yun(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Marco Elver <elver(a)google.com>
Cc: Marc Rutland <mark.rutland(a)arm.com>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Miguel Ojeda <ojeda(a)kernel.org>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Mostafa Saleh <smostafa(a)google.com>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Pankaj Gupta <pankaj.gupta(a)amd.com>
Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Samuel Holland <samuel.holland(a)sifive.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Thomas Huth <thuth(a)redhat.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: Uros Bizjak <ubizjak(a)gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Will Deacon <will(a)kernel.org>
Cc: Xin Li (Intel) <xin(a)zytor.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org> [6.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/kasan.h | 10 ++++++++++
mm/kasan/tags.c | 11 +++++++++++
mm/vmalloc.c | 4 +---
3 files changed, 22 insertions(+), 3 deletions(-)
--- a/include/linux/kasan.h~kasan-unpoison-pcpu-chunks-with-base-address-tag
+++ a/include/linux/kasan.h
@@ -614,6 +614,13 @@ static __always_inline void kasan_poison
__kasan_poison_vmalloc(start, size);
}
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms);
+static __always_inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+ if (kasan_enabled())
+ __kasan_unpoison_vmap_areas(vms, nr_vms);
+}
+
#else /* CONFIG_KASAN_VMALLOC */
static inline void kasan_populate_early_vm_area_shadow(void *start,
@@ -638,6 +645,9 @@ static inline void *kasan_unpoison_vmall
static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
{ }
+static inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{ }
+
#endif /* CONFIG_KASAN_VMALLOC */
#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
--- a/mm/kasan/tags.c~kasan-unpoison-pcpu-chunks-with-base-address-tag
+++ a/mm/kasan/tags.c
@@ -18,6 +18,7 @@
#include <linux/static_key.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
#include "kasan.h"
#include "../slab.h"
@@ -146,3 +147,13 @@ void __kasan_save_free_info(struct kmem_
{
save_stack_info(cache, object, 0, true);
}
+
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+ int area;
+
+ for (area = 0 ; area < nr_vms ; area++) {
+ kasan_poison(vms[area]->addr, vms[area]->size,
+ arch_kasan_get_tag(vms[area]->addr), false);
+ }
+}
--- a/mm/vmalloc.c~kasan-unpoison-pcpu-chunks-with-base-address-tag
+++ a/mm/vmalloc.c
@@ -4870,9 +4870,7 @@ retry:
* With hardware tag-based KASAN, marking is skipped for
* non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
*/
- for (area = 0; area < nr_vms; area++)
- vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
- vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
+ kasan_unpoison_vmap_areas(vms, nr_vms);
kfree(vas);
return vms;
_
Patches currently in -mm which might be from maciej.wieczor-retman(a)intel.com are
kasan-unpoison-pcpu-chunks-with-base-address-tag.patch
kasan-unpoison-vms-addresses-with-a-common-tag.patch
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
The problem presented here is related to NUMA systems and tag-based
KASAN modes - software and hardware ones. It can be explained in the
following points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits
the tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the
first chunk.
5. Thus, the subsequent chunks need to have their tag set to
match that of the first chunk.
Unpoison all vms[]->addr memory and pointers with the same tag to
resolve the mismatch.
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Cc: <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
---
Changelog v6:
- Add Baoquan's tested-by tag.
- Move patch to the beginning of the series as it is a fix.
- Add fixes tag.
mm/kasan/tags.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index ecc17c7c675a..c6b40cbffae3 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -148,12 +148,20 @@ void __kasan_save_free_info(struct kmem_cache *cache, void *object)
save_stack_info(cache, object, 0, true);
}
+/*
+ * A tag mismatch happens when calculating per-cpu chunk addresses, because
+ * they all inherit the tag from vms[0]->addr, even when nr_vms is bigger
+ * than 1. This is a problem because all the vms[]->addr come from separate
+ * allocations and have different tags so while the calculated address is
+ * correct the tag isn't.
+ */
void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
{
int area;
for (area = 0 ; area < nr_vms ; area++) {
kasan_poison(vms[area]->addr, vms[area]->size,
- arch_kasan_get_tag(vms[area]->addr), false);
+ arch_kasan_get_tag(vms[0]->addr), false);
+ arch_kasan_set_tag(vms[area]->addr, arch_kasan_get_tag(vms[0]->addr));
}
}
--
2.51.0
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
The problem presented here is related to NUMA systems and tag-based
KASAN modes - software and hardware ones. It can be explained in the
following points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits
the tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the
first chunk.
5. Thus, the subsequent chunks need to have their tag set to
match that of the first chunk.
Refactor code by moving it into a helper in preparation for the actual
fix.
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Cc: <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
---
Changelog v6:
- Add Baoquan's tested-by tag.
- Move patch to the beginning of the series as it is a fix.
- Move the refactored code to tags.c because both software and hardware
modes compile it.
- Add fixes tag.
Changelog v4:
- Redo the patch message numbered list.
- Do the refactoring in this patch and move additions to the next new
one.
Changelog v3:
- Remove last version of this patch that just resets the tag on
base_addr and add this patch that unpoisons all areas with the same
tag instead.
include/linux/kasan.h | 10 ++++++++++
mm/kasan/tags.c | 11 +++++++++++
mm/vmalloc.c | 4 +---
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index d12e1a5f5a9a..b00849ea8ffd 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -614,6 +614,13 @@ static __always_inline void kasan_poison_vmalloc(const void *start,
__kasan_poison_vmalloc(start, size);
}
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms);
+static __always_inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+ if (kasan_enabled())
+ __kasan_unpoison_vmap_areas(vms, nr_vms);
+}
+
#else /* CONFIG_KASAN_VMALLOC */
static inline void kasan_populate_early_vm_area_shadow(void *start,
@@ -638,6 +645,9 @@ static inline void *kasan_unpoison_vmalloc(const void *start,
static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
{ }
+static inline void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{ }
+
#endif /* CONFIG_KASAN_VMALLOC */
#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index b9f31293622b..ecc17c7c675a 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -18,6 +18,7 @@
#include <linux/static_key.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
#include "kasan.h"
#include "../slab.h"
@@ -146,3 +147,13 @@ void __kasan_save_free_info(struct kmem_cache *cache, void *object)
{
save_stack_info(cache, object, 0, true);
}
+
+void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
+{
+ int area;
+
+ for (area = 0 ; area < nr_vms ; area++) {
+ kasan_poison(vms[area]->addr, vms[area]->size,
+ arch_kasan_get_tag(vms[area]->addr), false);
+ }
+}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 798b2ed21e46..934c8bfbcebf 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4870,9 +4870,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
* With hardware tag-based KASAN, marking is skipped for
* non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
*/
- for (area = 0; area < nr_vms; area++)
- vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
- vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
+ kasan_unpoison_vmap_areas(vms, nr_vms);
kfree(vas);
return vms;
--
2.51.0
Hello.
We have observed a huge latency increase using `fork()` after ingesting the CVE-2025-38085 fix which leads to the commit `1013af4f585f: mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race`. On large machines with 1.5TB of memory with 196 cores, we identified mmapping of 1.2TB of shared memory and forking itself dozens or hundreds of times we see a increase of execution times of a factor of 4. The reproducer is at the end of the email.
Comparing the a kernel without this patch with a kernel with this patch applied when spawning 1000 children we see those execution times:
Patched kernel:
$ time make stress
...
real 0m11.275s
user 0m0.177s
sys 0m23.905s
Original kernel :
$ time make stress
...real 0m2.475s
user 0m1.398s
sys 0m2.501s
The patch in question: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
My observation/assumption is:
each child touches 100 random pages and despawns
on each despawn `huge_pmd_unshare()` is called
each call to `huge_pmd_unshare()` syncrhonizes all threads using `tlb_remove_table_sync_one()` leading to the regression
I'm happy to provide more information.
Thank you
Stanislav Uschakow
=== Reproducer ===
Setup:
#!/bin/bash
echo "Setting up hugepages for reproduction..."
# hugepages (1.2TB / 2MB = 614400 pages)
REQUIRED_PAGES=614400
# Check current hugepage allocation
CURRENT_PAGES=$(cat /proc/sys/vm/nr_hugepages)
echo "Current hugepages: $CURRENT_PAGES"
if [ "$CURRENT_PAGES" -lt "$REQUIRED_PAGES" ]; then
echo "Allocating $REQUIRED_PAGES hugepages..."
echo $REQUIRED_PAGES | sudo tee /proc/sys/vm/nr_hugepages
ALLOCATED=$(cat /proc/sys/vm/nr_hugepages)
echo "Allocated hugepages: $ALLOCATED"
if [ "$ALLOCATED" -lt "$REQUIRED_PAGES" ]; then
echo "Warning: Could not allocate all required hugepages"
echo "Available: $ALLOCATED, Required: $REQUIRED_PAGES"
fi
fi
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
echo -e "\nHugepage information:"
cat /proc/meminfo | grep -i huge
echo -e "\nSetup complete. You can now run the reproduction test."
Makefile:
CXX = gcc
CXXFLAGS = -O2 -Wall
TARGET = hugepage_repro
SOURCE = hugepage_repro.c
$(TARGET): $(SOURCE)
$(CXX) $(CXXFLAGS) -o $(TARGET) $(SOURCE)
clean:
rm -f $(TARGET)
setup:
chmod +x setup_hugepages.sh
./setup_hugepages.sh
test: $(TARGET)
./$(TARGET) 20 3
stress: $(TARGET)
./$(TARGET) 1000 1
.PHONY: clean setup test stress
hugepage_repro.c:
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdio.h>
#define HUGEPAGE_SIZE (2 * 1024 * 1024) // 2MB
#define TOTAL_SIZE (1200ULL * 1024 * 1024 * 1024) // 1.2TB
#define NUM_HUGEPAGES (TOTAL_SIZE / HUGEPAGE_SIZE)
void* create_hugepage_mapping() {
void* addr = mmap(NULL, TOTAL_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
if (addr == MAP_FAILED) {
perror("mmap hugepages failed");
exit(1);
}
return addr;
}
void touch_random_pages(void* addr, int num_touches) {
char* base = (char*)addr;
for (int i = 0; i < num_touches; ++i) {
size_t offset = (rand() % NUM_HUGEPAGES) * HUGEPAGE_SIZE;
volatile char val = base[offset];
(void)val;
}
}
void child_process(void* shared_mem, int child_id) {
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
touch_random_pages(shared_mem, 100);
clock_gettime(CLOCK_MONOTONIC, &end);
long duration = (end.tv_sec - start.tv_sec) * 1000000 +
(end.tv_nsec - start.tv_nsec) / 1000;
printf("Child %d completed in %ld μs\n", child_id, duration);
}
int main(int argc, char* argv[]) {
int num_processes = argc > 1 ? atoi(argv[1]) : 50;
int iterations = argc > 2 ? atoi(argv[2]) : 5;
printf("Creating %lluGB hugepage mapping...\n", TOTAL_SIZE / (1024*1024*1024));
void* shared_mem = create_hugepage_mapping();
for (int iter = 0; iter < iterations; ++iter) {
printf("\nIteration %d: Forking %d processes\n", iter + 1, num_processes);
pid_t children[num_processes];
struct timespec iter_start, iter_end;
clock_gettime(CLOCK_MONOTONIC, &iter_start);
for (int i = 0; i < num_processes; ++i) {
pid_t pid = fork();
if (pid == 0) {
child_process(shared_mem, i);
exit(0);
} else if (pid > 0) {
children[i] = pid;
}
}
for (int i = 0; i < num_processes; ++i) {
waitpid(children[i], NULL, 0);
}
clock_gettime(CLOCK_MONOTONIC, &iter_end);
long iter_duration = (iter_end.tv_sec - iter_start.tv_sec) * 1000 +
(iter_end.tv_nsec - iter_start.tv_nsec) / 1000000;
printf("Iteration completed in %ld ms\n", iter_duration);
}
munmap(shared_mem, TOTAL_SIZE);
return 0;
}
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597