The patch below does not apply to the 6.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.14.y
git checkout FETCH_HEAD
git cherry-pick -x 08f959759e1e6e9c4b898c51a7d387ac3480630b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052516-factsheet-coagulant-3fb0@gregkh' --subject-prefix 'PATCH 6.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08f959759e1e6e9c4b898c51a7d387ac3480630b Mon Sep 17 00:00:00 2001
From: Nicolas Frattaroli <nicolas.frattaroli(a)collabora.com>
Date: Wed, 23 Apr 2025 09:53:32 +0200
Subject: [PATCH] mmc: sdhci-of-dwcmshc: add PD workaround on RK3576
RK3576's power domains have a peculiar design where the PD_NVM power
domain, of which the sdhci controller is a part, seemingly does not have
idempotent runtime disable/enable. The end effect is that if PD_NVM gets
turned off by the generic power domain logic because all the devices
depending on it are suspended, then the next time the sdhci device is
unsuspended, it'll hang the SoC as soon as it tries accessing the CQHCI
registers.
RK3576's UFS support needed a new dev_pm_genpd_rpm_always_on function
added to the generic power domains API to handle what appears to be a
similar hardware design.
Use this new function to ask for the same treatment in the sdhci
controller by giving rk3576 its own platform data with its own postinit
function. The benefit of doing this instead of marking the power domains
always on in the power domain core is that we only do this if we know
the platform we're running on actually uses the sdhci controller. For
others, keeping PD_NVM always on would be a waste, as they won't run
into this specific issue. The only other IP in PD_NVM that could be
affected is FSPI0. If it gets a mainline driver, it will probably want
to do the same thing.
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli(a)collabora.com>
Reviewed-by: Shawn Lin <shawn.lin(a)rock-chips.com>
Fixes: cfee1b507758 ("pmdomain: rockchip: Add support for RK3576 SoC")
Cc: <stable(a)vger.kernel.org> # v6.15+
Link: https://lore.kernel.org/r/20250423-rk3576-emmc-fix-v3-1-0bf80e29967f@collab…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 09b9ab15e499..a20d03fdd6a9 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/sizes.h>
@@ -745,6 +746,29 @@ static void dwcmshc_rk35xx_postinit(struct sdhci_host *host, struct dwcmshc_priv
}
}
+static void dwcmshc_rk3576_postinit(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
+{
+ struct device *dev = mmc_dev(host->mmc);
+ int ret;
+
+ /*
+ * This works around the design of the RK3576's power domains, which
+ * makes the PD_NVM power domain, which the sdhci controller on the
+ * RK3576 is in, never come back the same way once it's run-time
+ * suspended once. This can happen during early kernel boot if no driver
+ * is using either PD_NVM or its child power domain PD_SDGMAC for a
+ * short moment, leading to it being turned off to save power. By
+ * keeping it on, sdhci suspending won't lead to PD_NVM becoming a
+ * candidate for getting turned off.
+ */
+ ret = dev_pm_genpd_rpm_always_on(dev, true);
+ if (ret && ret != -EOPNOTSUPP)
+ dev_warn(dev, "failed to set PD rpm always on, SoC may hang later: %pe\n",
+ ERR_PTR(ret));
+
+ dwcmshc_rk35xx_postinit(host, dwc_priv);
+}
+
static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -1176,6 +1200,18 @@ static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk35xx_pdata = {
.postinit = dwcmshc_rk35xx_postinit,
};
+static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk3576_pdata = {
+ .pdata = {
+ .ops = &sdhci_dwcmshc_rk35xx_ops,
+ .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
+ },
+ .init = dwcmshc_rk35xx_init,
+ .postinit = dwcmshc_rk3576_postinit,
+};
+
static const struct dwcmshc_pltfm_data sdhci_dwcmshc_th1520_pdata = {
.pdata = {
.ops = &sdhci_dwcmshc_th1520_ops,
@@ -1274,6 +1310,10 @@ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = {
.compatible = "rockchip,rk3588-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
},
+ {
+ .compatible = "rockchip,rk3576-dwcmshc",
+ .data = &sdhci_dwcmshc_rk3576_pdata,
+ },
{
.compatible = "rockchip,rk3568-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 08f959759e1e6e9c4b898c51a7d387ac3480630b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052515-headdress-overcome-c741@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08f959759e1e6e9c4b898c51a7d387ac3480630b Mon Sep 17 00:00:00 2001
From: Nicolas Frattaroli <nicolas.frattaroli(a)collabora.com>
Date: Wed, 23 Apr 2025 09:53:32 +0200
Subject: [PATCH] mmc: sdhci-of-dwcmshc: add PD workaround on RK3576
RK3576's power domains have a peculiar design where the PD_NVM power
domain, of which the sdhci controller is a part, seemingly does not have
idempotent runtime disable/enable. The end effect is that if PD_NVM gets
turned off by the generic power domain logic because all the devices
depending on it are suspended, then the next time the sdhci device is
unsuspended, it'll hang the SoC as soon as it tries accessing the CQHCI
registers.
RK3576's UFS support needed a new dev_pm_genpd_rpm_always_on function
added to the generic power domains API to handle what appears to be a
similar hardware design.
Use this new function to ask for the same treatment in the sdhci
controller by giving rk3576 its own platform data with its own postinit
function. The benefit of doing this instead of marking the power domains
always on in the power domain core is that we only do this if we know
the platform we're running on actually uses the sdhci controller. For
others, keeping PD_NVM always on would be a waste, as they won't run
into this specific issue. The only other IP in PD_NVM that could be
affected is FSPI0. If it gets a mainline driver, it will probably want
to do the same thing.
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli(a)collabora.com>
Reviewed-by: Shawn Lin <shawn.lin(a)rock-chips.com>
Fixes: cfee1b507758 ("pmdomain: rockchip: Add support for RK3576 SoC")
Cc: <stable(a)vger.kernel.org> # v6.15+
Link: https://lore.kernel.org/r/20250423-rk3576-emmc-fix-v3-1-0bf80e29967f@collab…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 09b9ab15e499..a20d03fdd6a9 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/sizes.h>
@@ -745,6 +746,29 @@ static void dwcmshc_rk35xx_postinit(struct sdhci_host *host, struct dwcmshc_priv
}
}
+static void dwcmshc_rk3576_postinit(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
+{
+ struct device *dev = mmc_dev(host->mmc);
+ int ret;
+
+ /*
+ * This works around the design of the RK3576's power domains, which
+ * makes the PD_NVM power domain, which the sdhci controller on the
+ * RK3576 is in, never come back the same way once it's run-time
+ * suspended once. This can happen during early kernel boot if no driver
+ * is using either PD_NVM or its child power domain PD_SDGMAC for a
+ * short moment, leading to it being turned off to save power. By
+ * keeping it on, sdhci suspending won't lead to PD_NVM becoming a
+ * candidate for getting turned off.
+ */
+ ret = dev_pm_genpd_rpm_always_on(dev, true);
+ if (ret && ret != -EOPNOTSUPP)
+ dev_warn(dev, "failed to set PD rpm always on, SoC may hang later: %pe\n",
+ ERR_PTR(ret));
+
+ dwcmshc_rk35xx_postinit(host, dwc_priv);
+}
+
static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -1176,6 +1200,18 @@ static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk35xx_pdata = {
.postinit = dwcmshc_rk35xx_postinit,
};
+static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk3576_pdata = {
+ .pdata = {
+ .ops = &sdhci_dwcmshc_rk35xx_ops,
+ .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
+ },
+ .init = dwcmshc_rk35xx_init,
+ .postinit = dwcmshc_rk3576_postinit,
+};
+
static const struct dwcmshc_pltfm_data sdhci_dwcmshc_th1520_pdata = {
.pdata = {
.ops = &sdhci_dwcmshc_th1520_ops,
@@ -1274,6 +1310,10 @@ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = {
.compatible = "rockchip,rk3588-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
},
+ {
+ .compatible = "rockchip,rk3576-dwcmshc",
+ .data = &sdhci_dwcmshc_rk3576_pdata,
+ },
{
.compatible = "rockchip,rk3568-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
The quilt patch titled
Subject: mm: fix copy_vma() error handling for hugetlb mappings
has been removed from the -mm tree. Its filename was
mm-fix-copy_vma-error-handling-for-hugetlb-mappings.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ricardo Ca��uelo Navarro <rcn(a)igalia.com>
Subject: mm: fix copy_vma() error handling for hugetlb mappings
Date: Fri, 23 May 2025 14:19:10 +0200
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Ca��uelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 16 +++++++++++++++-
mm/mremap.c | 3 +--
mm/vma.c | 1 +
4 files changed, 22 insertions(+), 3 deletions(-)
--- a/include/linux/hugetlb.h~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(s
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
--- a/mm/hugetlb.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_a
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
--- a/mm/mremap.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
--- a/mm/vma.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct v
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
_
Patches currently in -mm which might be from rcn(a)igalia.com are
The quilt patch titled
Subject: memcg: always call cond_resched() after fn()
has been removed from the -mm tree. Its filename was
memcg-always-call-cond_resched-after-fn.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Breno Leitao <leitao(a)debian.org>
Subject: memcg: always call cond_resched() after fn()
Date: Fri, 23 May 2025 10:21:06 -0700
I am seeing soft lockup on certain machine types when a cgroup OOMs. This
is happening because killing the process in certain machine might be very
slow, which causes the soft lockup and RCU stalls. This happens usually
when the cgroup has MANY processes and memory.oom.group is set.
Example I am seeing in real production:
[462012.244552] Memory cgroup out of memory: Killed process 3370438 (crosvm) ....
....
[462037.318059] Memory cgroup out of memory: Killed process 4171372 (adb) ....
[462037.348314] watchdog: BUG: soft lockup - CPU#64 stuck for 26s! [stat_manager-ag:1618982]
....
Quick look at why this is so slow, it seems to be related to serial flush
for certain machine types. For all the crashes I saw, the target CPU was
at console_flush_all().
In the case above, there are thousands of processes in the cgroup, and it
is soft locking up before it reaches the 1024 limit in the code (which
would call the cond_resched()). So, cond_resched() in 1024 blocks is not
sufficient.
Remove the counter-based conditional rescheduling logic and call
cond_resched() unconditionally after each task iteration, after fn() is
called. This avoids the lockup independently of how slow fn() is.
Link: https://lkml.kernel.org/r/20250523-memcg_fix-v1-1-ad3eafb60477@debian.org
Fixes: ade81479c7dd ("memcg: fix soft lockup in the OOM process")
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Suggested-by: Rik van Riel <riel(a)surriel.com>
Acked-by: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Michael van der Westhuizen <rmikey(a)meta.com>
Cc: Usama Arif <usamaarif642(a)gmail.com>
Cc: Pavel Begunkov <asml.silence(a)gmail.com>
Cc: Chen Ridong <chenridong(a)huawei.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
--- a/mm/memcontrol.c~memcg-always-call-cond_resched-after-fn
+++ a/mm/memcontrol.c
@@ -1168,7 +1168,6 @@ void mem_cgroup_scan_tasks(struct mem_cg
{
struct mem_cgroup *iter;
int ret = 0;
- int i = 0;
BUG_ON(mem_cgroup_is_root(memcg));
@@ -1178,10 +1177,9 @@ void mem_cgroup_scan_tasks(struct mem_cg
css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
while (!ret && (task = css_task_iter_next(&it))) {
- /* Avoid potential softlockup warning */
- if ((++i & 1023) == 0)
- cond_resched();
ret = fn(task, arg);
+ /* Avoid potential softlockup warning */
+ cond_resched();
}
css_task_iter_end(&it);
if (ret) {
_
Patches currently in -mm which might be from leitao(a)debian.org are
The quilt patch titled
Subject: mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios
has been removed from the -mm tree. Its filename was
mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ge Yang <yangge1116(a)126.com>
Subject: mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios
Date: Thu, 22 May 2025 11:22:17 +0800
A kernel crash was observed when replacing free hugetlb folios:
BUG: kernel NULL pointer dereference, address: 0000000000000028
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP NOPTI
CPU: 28 UID: 0 PID: 29639 Comm: test_cma.sh Tainted 6.15.0-rc6-zp #41 PREEMPT(voluntary)
RIP: 0010:alloc_and_dissolve_hugetlb_folio+0x1d/0x1f0
RSP: 0018:ffffc9000b30fa90 EFLAGS: 00010286
RAX: 0000000000000000 RBX: 0000000000342cca RCX: ffffea0043000000
RDX: ffffc9000b30fb08 RSI: ffffea0043000000 RDI: 0000000000000000
RBP: ffffc9000b30fb20 R08: 0000000000001000 R09: 0000000000000000
R10: ffff88886f92eb00 R11: 0000000000000000 R12: ffffea0043000000
R13: 0000000000000000 R14: 00000000010c0200 R15: 0000000000000004
FS: 00007fcda5f14740(0000) GS:ffff8888ec1d8000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 0000000391402000 CR4: 0000000000350ef0
Call Trace:
<TASK>
replace_free_hugepage_folios+0xb6/0x100
alloc_contig_range_noprof+0x18a/0x590
? srso_return_thunk+0x5/0x5f
? down_read+0x12/0xa0
? srso_return_thunk+0x5/0x5f
cma_range_alloc.constprop.0+0x131/0x290
__cma_alloc+0xcf/0x2c0
cma_alloc_write+0x43/0xb0
simple_attr_write_xsigned.constprop.0.isra.0+0xb2/0x110
debugfs_attr_write+0x46/0x70
full_proxy_write+0x62/0xa0
vfs_write+0xf8/0x420
? srso_return_thunk+0x5/0x5f
? filp_flush+0x86/0xa0
? srso_return_thunk+0x5/0x5f
? filp_close+0x1f/0x30
? srso_return_thunk+0x5/0x5f
? do_dup2+0xaf/0x160
? srso_return_thunk+0x5/0x5f
ksys_write+0x65/0xe0
do_syscall_64+0x64/0x170
entry_SYSCALL_64_after_hwframe+0x76/0x7e
There is a potential race between __update_and_free_hugetlb_folio() and
replace_free_hugepage_folios():
CPU1 CPU2
__update_and_free_hugetlb_folio replace_free_hugepage_folios
folio_test_hugetlb(folio)
-- It's still hugetlb folio.
__folio_clear_hugetlb(folio)
hugetlb_free_folio(folio)
h = folio_hstate(folio)
-- Here, h is NULL pointer
When the above race condition occurs, folio_hstate(folio) returns NULL,
and subsequent access to this NULL pointer will cause the system to crash.
To resolve this issue, execute folio_hstate(folio) under the protection
of the hugetlb_lock lock, ensuring that folio_hstate(folio) does not
return NULL.
Link: https://lkml.kernel.org/r/1747884137-26685-1-git-send-email-yangge1116@126.…
Fixes: 04f13d241b8b ("mm: replace free hugepage folios after migration")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 8 ++++++++
1 file changed, 8 insertions(+)
--- a/mm/hugetlb.c~mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios
+++ a/mm/hugetlb.c
@@ -2949,12 +2949,20 @@ int replace_free_hugepage_folios(unsigne
while (start_pfn < end_pfn) {
folio = pfn_folio(start_pfn);
+
+ /*
+ * The folio might have been dissolved from under our feet, so make sure
+ * to carefully check the state under the lock.
+ */
+ spin_lock_irq(&hugetlb_lock);
if (folio_test_hugetlb(folio)) {
h = folio_hstate(folio);
} else {
+ spin_unlock_irq(&hugetlb_lock);
start_pfn++;
continue;
}
+ spin_unlock_irq(&hugetlb_lock);
if (!folio_ref_count(folio)) {
ret = alloc_and_dissolve_hugetlb_folio(h, folio,
_
Patches currently in -mm which might be from yangge1116(a)126.com are
The quilt patch titled
Subject: mm: vmalloc: only zero-init on vrealloc shrink
has been removed from the -mm tree. Its filename was
mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kees Cook <kees(a)kernel.org>
Subject: mm: vmalloc: only zero-init on vrealloc shrink
Date: Thu, 15 May 2025 14:42:16 -0700
The common case is to grow reallocations, and since init_on_alloc will
have already zeroed the whole allocation, we only need to zero when
shrinking the allocation.
Link: https://lkml.kernel.org/r/20250515214217.619685-2-kees@kernel.org
Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing")
Signed-off-by: Kees Cook <kees(a)kernel.org>
Tested-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
Cc: Danilo Krummrich <dakr(a)kernel.org>
Cc: Eduard Zingerman <eddyz87(a)gmail.com>
Cc: "Erhard F." <erhard_f(a)mailbox.org>
Cc: Shung-Hsi Yu <shung-hsi.yu(a)suse.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmalloc.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
--- a/mm/vmalloc.c~mm-vmalloc-only-zero-init-on-vrealloc-shrink
+++ a/mm/vmalloc.c
@@ -4093,8 +4093,8 @@ void *vrealloc_noprof(const void *p, siz
* would be a good heuristic for when to shrink the vm_area?
*/
if (size <= old_size) {
- /* Zero out "freed" memory. */
- if (want_init_on_free())
+ /* Zero out "freed" memory, potentially for future realloc. */
+ if (want_init_on_free() || want_init_on_alloc(flags))
memset((void *)p + size, 0, old_size - size);
vm->requested_size = size;
kasan_poison_vmalloc(p + size, old_size - size);
@@ -4107,9 +4107,11 @@ void *vrealloc_noprof(const void *p, siz
if (size <= alloced_size) {
kasan_unpoison_vmalloc(p + old_size, size - old_size,
KASAN_VMALLOC_PROT_NORMAL);
- /* Zero out "alloced" memory. */
- if (want_init_on_alloc(flags))
- memset((void *)p + old_size, 0, size - old_size);
+ /*
+ * No need to zero memory here, as unused memory will have
+ * already been zeroed at initial allocation time or during
+ * realloc shrink time.
+ */
vm->requested_size = size;
return (void *)p;
}
_
Patches currently in -mm which might be from kees(a)kernel.org are