The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 009c3a4bc41e855fd76f92727f9fbae4e5917d7f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025070621-plant-sedan-c997@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 009c3a4bc41e855fd76f92727f9fbae4e5917d7f Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman(a)sandisk.com>
Date: Mon, 26 May 2025 14:44:45 +0300
Subject: [PATCH] mmc: core: sd: Apply BROKEN_SD_DISCARD quirk earlier
Move the BROKEN_SD_DISCARD quirk for certain SanDisk SD cards from the
`mmc_blk_fixups[]` to `mmc_sd_fixups[]`. This ensures the quirk is
applied earlier in the device initialization process, aligning with the
reasoning in [1]. Applying the quirk sooner prevents the kernel from
incorrectly enabling discard support on affected cards during initial
setup.
[1] https://lore.kernel.org/all/20240820230631.GA436523@sony.com
Fixes: 07d2872bf4c8 ("mmc: core: Add SD card quirk for broken discard")
Signed-off-by: Avri Altman <avri.altman(a)sandisk.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250526114445.675548-1-avri.altman@sandisk.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 7f893bafaa60..c417ed34c057 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -44,6 +44,12 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = {
0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd,
MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY),
+ /*
+ * Some SD cards reports discard support while they don't
+ */
+ MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
+ MMC_QUIRK_BROKEN_SD_DISCARD),
+
END_FIXUP
};
@@ -147,12 +153,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc,
MMC_QUIRK_TRIM_BROKEN),
- /*
- * Some SD cards reports discard support while they don't
- */
- MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
- MMC_QUIRK_BROKEN_SD_DISCARD),
-
END_FIXUP
};
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 009c3a4bc41e855fd76f92727f9fbae4e5917d7f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025070620-seclusion-jarring-cacd@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 009c3a4bc41e855fd76f92727f9fbae4e5917d7f Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman(a)sandisk.com>
Date: Mon, 26 May 2025 14:44:45 +0300
Subject: [PATCH] mmc: core: sd: Apply BROKEN_SD_DISCARD quirk earlier
Move the BROKEN_SD_DISCARD quirk for certain SanDisk SD cards from the
`mmc_blk_fixups[]` to `mmc_sd_fixups[]`. This ensures the quirk is
applied earlier in the device initialization process, aligning with the
reasoning in [1]. Applying the quirk sooner prevents the kernel from
incorrectly enabling discard support on affected cards during initial
setup.
[1] https://lore.kernel.org/all/20240820230631.GA436523@sony.com
Fixes: 07d2872bf4c8 ("mmc: core: Add SD card quirk for broken discard")
Signed-off-by: Avri Altman <avri.altman(a)sandisk.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250526114445.675548-1-avri.altman@sandisk.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 7f893bafaa60..c417ed34c057 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -44,6 +44,12 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = {
0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd,
MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY),
+ /*
+ * Some SD cards reports discard support while they don't
+ */
+ MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
+ MMC_QUIRK_BROKEN_SD_DISCARD),
+
END_FIXUP
};
@@ -147,12 +153,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc,
MMC_QUIRK_TRIM_BROKEN),
- /*
- * Some SD cards reports discard support while they don't
- */
- MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
- MMC_QUIRK_BROKEN_SD_DISCARD),
-
END_FIXUP
};
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 45537926dd2aaa9190ac0fac5a0fbeefcadfea95
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025070623-pacific-brewery-7cd2@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 45537926dd2aaa9190ac0fac5a0fbeefcadfea95 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle(a)linux.ibm.com>
Date: Wed, 25 Jun 2025 11:28:28 +0200
Subject: [PATCH] s390/pci: Fix stale function handles in error handling
The error event information for PCI error events contains a function
handle for the respective function. This handle is generally captured at
the time the error event was recorded. Due to delays in processing or
cascading issues, it may happen that during firmware recovery multiple
events are generated. When processing these events in order Linux may
already have recovered an affected function making the event information
stale. Fix this by doing an unconditional CLP List PCI function
retrieving the current function handle with the zdev->state_lock held
and ignoring the event if its function handle is stale.
Cc: stable(a)vger.kernel.org
Fixes: 4cdf2f4e24ff ("s390/pci: implement minimal PCI error recovery")
Reviewed-by: Julian Ruess <julianr(a)linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer(a)linux.ibm.com>
Reviewed-by: Farhan Ali <alifm(a)linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle(a)linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 2fbee3887d13..82ee2578279a 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -273,6 +273,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -281,6 +283,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 45537926dd2aaa9190ac0fac5a0fbeefcadfea95
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025070622-clay-crimp-7cf6@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 45537926dd2aaa9190ac0fac5a0fbeefcadfea95 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle(a)linux.ibm.com>
Date: Wed, 25 Jun 2025 11:28:28 +0200
Subject: [PATCH] s390/pci: Fix stale function handles in error handling
The error event information for PCI error events contains a function
handle for the respective function. This handle is generally captured at
the time the error event was recorded. Due to delays in processing or
cascading issues, it may happen that during firmware recovery multiple
events are generated. When processing these events in order Linux may
already have recovered an affected function making the event information
stale. Fix this by doing an unconditional CLP List PCI function
retrieving the current function handle with the zdev->state_lock held
and ignoring the event if its function handle is stale.
Cc: stable(a)vger.kernel.org
Fixes: 4cdf2f4e24ff ("s390/pci: implement minimal PCI error recovery")
Reviewed-by: Julian Ruess <julianr(a)linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer(a)linux.ibm.com>
Reviewed-by: Farhan Ali <alifm(a)linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle(a)linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 2fbee3887d13..82ee2578279a 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -273,6 +273,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -281,6 +283,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
This series has a bug fix for the early bootconsole as well as some
related efficiency improvements and cleanup.
The relevant code is subject to CONSOLE_DEBUG, which is presently only
used with CONFIG_MAC. To test this series (in qemu-system-m68k, for example)
it's helpful to enable CONFIG_EARLY_PRINTK and
CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER and boot with
kernel parameters 'console=ttyS0 earlyprintk keep_bootcon'.
---
Changed since v1:
- Solved problem with line wrap while scrolling.
- Added two additional patches.
Changed since v2:
- Adopted addq and subq as suggested by Andreas.
Finn Thain (3):
m68k: Fix lost column on framebuffer debug console
m68k: Avoid pointless recursion in debug console rendering
m68k: Remove unused "cursor home" code from debug console
arch/m68k/kernel/head.S | 73 +++++++++++++++++++++--------------------
1 file changed, 37 insertions(+), 36 deletions(-)
--
2.45.3
Hi,
We’re offering exclusive access to the CWIEME Berlin 2025 Visitor Contact List, featuring 8,954 verified contacts from industry professionals.
Each contact contains: Contact name, Job Title, Business Name, Physical Address, Phone Numbers, Official Email address and many more…
Limited-Time Offer: Celebrate Independence Day with 20% OFF your purchase!
If you're interested in the list, just reply "Send me Pricing"?
Kind Regards,
Nancy Doll
Sr. Marketing Manager
If you do not wish to receive this newsletter reply as “Unfollow”
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x fba46a5d83ca8decb338722fb4899026d8d9ead2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025063033-shrink-submersed-b5de@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fba46a5d83ca8decb338722fb4899026d8d9ead2 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Date: Mon, 16 Jun 2025 14:45:20 -0400
Subject: [PATCH] maple_tree: fix MA_STATE_PREALLOC flag in mas_preallocate()
Temporarily clear the preallocation flag when explicitly requesting
allocations. Pre-existing allocations are already counted against the
request through mas_node_count_gfp(), but the allocations will not happen
if the MA_STATE_PREALLOC flag is set. This flag is meant to avoid
re-allocating in bulk allocation mode, and to detect issues with
preallocation calculations.
The MA_STATE_PREALLOC flag should also always be set on zero allocations
so that detection of underflow allocations will print a WARN_ON() during
consumption.
User visible effect of this flaw is a WARN_ON() followed by a null pointer
dereference when subsequent requests for larger number of nodes is
ignored, such as the vma merge retry in mmap_region() caused by drivers
altering the vma flags (which happens in v6.6, at least)
Link: https://lkml.kernel.org/r/20250616184521.3382795-3-Liam.Howlett@oracle.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Reported-by: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
Reported-by: Hailong Liu <hailong.liu(a)oppo.com>
Link: https://lore.kernel.org/all/1652f7eb-a51b-4fee-8058-c73af63bacd1@oppo.com/
Link: https://lore.kernel.org/all/20250428184058.1416274-1-Liam.Howlett@oracle.co…
Link: https://lore.kernel.org/all/20250429014754.1479118-1-Liam.Howlett@oracle.co…
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Hailong Liu <hailong.liu(a)oppo.com>
Cc: zhangpeng.00(a)bytedance.com <zhangpeng.00(a)bytedance.com>
Cc: Steve Kang <Steve.Kang(a)unisoc.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Sidhartha Kumar <sidhartha.kumar(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index affe979bd14d..00524e55a21e 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5527,8 +5527,9 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
mas->store_type = mas_wr_store_type(&wr_mas);
request = mas_prealloc_calc(&wr_mas, entry);
if (!request)
- return ret;
+ goto set_flag;
+ mas->mas_flags &= ~MA_STATE_PREALLOC;
mas_node_count_gfp(mas, request, gfp);
if (mas_is_err(mas)) {
mas_set_alloc_req(mas, 0);
@@ -5538,6 +5539,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
return ret;
}
+set_flag:
mas->mas_flags |= MA_STATE_PREALLOC;
return ret;
}
The value for some hwprobe keys, like MISALIGNED_VECTOR_PERF, is
determined by an asynchronous kthread. This kthread can finish after
the hwprobe vDSO data is populated, creating a race condition where
userspace can read stale values.
A completion-based framework is introduced to synchronize the async
probes with the vDSO population. The init_hwprobe_vdso_data()
function is deferred to `late_initcall` and now blocks until all
probes signal completion.
Reported-by: Tsukasa OI <research_trasio(a)irq.a4lg.com>
Closes: https://lore.kernel.org/linux-riscv/760d637b-b13b-4518-b6bf-883d55d44e7f@ir…
Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Alexandre Ghiti <alexghiti(a)rivosinc.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Jingwei Wang <wangjingwei(a)iscas.ac.cn>
---
Changes in v4:
- Reworked the synchronization mechanism based on feedback from Palmer
and Alexandre.
- Instead of a post-hoc refresh, this version introduces a robust
completion-based framework using an atomic counter to ensure async
probes are finished before populating the vDSO.
- Moved the vdso data initialization to a late_initcall to avoid
impacting boot time.
Changes in v3:
- Retained existing blank line.
Changes in v2:
- Addressed feedback from Yixun's regarding #ifdef CONFIG_MMU usage.
- Updated commit message to provide a high-level summary.
- Added Fixes tag for commit e7c9d66e313b.
v1: https://lore.kernel.org/linux-riscv/20250521052754.185231-1-wangjingwei@isc…
arch/riscv/include/asm/hwprobe.h | 8 +++++++-
arch/riscv/kernel/sys_hwprobe.c | 20 +++++++++++++++++++-
arch/riscv/kernel/unaligned_access_speed.c | 9 +++++++--
3 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 7fe0a379474ae2c6..87af186d92e75ddb 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -40,5 +40,11 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
return pair->value == other_pair->value;
}
-
+#ifdef CONFIG_MMU
+void riscv_hwprobe_register_async_probe(void);
+void riscv_hwprobe_complete_async_probe(void);
+#else
+inline void riscv_hwprobe_register_async_probe(void) {}
+inline void riscv_hwprobe_complete_async_probe(void) {}
+#endif
#endif
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 0b170e18a2beba57..8c50dcec2b754c30 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -5,6 +5,8 @@
* more details.
*/
#include <linux/syscalls.h>
+#include <linux/completion.h>
+#include <linux/atomic.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
@@ -467,6 +469,20 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
#ifdef CONFIG_MMU
+static DECLARE_COMPLETION(boot_probes_done);
+static atomic_t pending_boot_probes = ATOMIC_INIT(0);
+
+void riscv_hwprobe_register_async_probe(void)
+{
+ atomic_inc(&pending_boot_probes);
+}
+
+void riscv_hwprobe_complete_async_probe(void)
+{
+ if (atomic_dec_and_test(&pending_boot_probes))
+ complete(&boot_probes_done);
+}
+
static int __init init_hwprobe_vdso_data(void)
{
struct vdso_arch_data *avd = vdso_k_arch_data;
@@ -474,6 +490,8 @@ static int __init init_hwprobe_vdso_data(void)
struct riscv_hwprobe pair;
int key;
+ if (unlikely(atomic_read(&pending_boot_probes) > 0))
+ wait_for_completion(&boot_probes_done);
/*
* Initialize vDSO data with the answers for the "all CPUs" case, to
* save a syscall in the common case.
@@ -504,7 +522,7 @@ static int __init init_hwprobe_vdso_data(void)
return 0;
}
-arch_initcall_sync(init_hwprobe_vdso_data);
+late_initcall(init_hwprobe_vdso_data);
#endif /* CONFIG_MMU */
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index ae2068425fbcd207..4b8ad2673b0f7470 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -379,6 +379,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
schedule_on_each_cpu(check_vector_unaligned_access);
+ riscv_hwprobe_complete_async_probe();
return 0;
}
@@ -473,8 +474,12 @@ static int __init check_unaligned_access_all_cpus(void)
per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
} else if (!check_vector_unaligned_access_emulated_all_cpus() &&
IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
- kthread_run(vec_check_unaligned_access_speed_all_cpus,
- NULL, "vec_check_unaligned_access_speed_all_cpus");
+ riscv_hwprobe_register_async_probe();
+ if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus,
+ NULL, "vec_check_unaligned_access_speed_all_cpus"))) {
+ pr_warn("Failed to create vec_unalign_check kthread\n");
+ riscv_hwprobe_complete_async_probe();
+ }
}
/*
--
2.50.0
This patch series introduces a few minor fixes on Exynos7870 devices.
These fix USB gadget problems and serious crashes on certain variants of
devices. More information is provided in respective commits.
This series has no dependencies. Would be nice to get them merged in
6.16 itself. I assume it's okay to cc stable as the -rc releases are
also owned by the "Stable Group" in git.kernel.org... [1] [2]
[1] https://git.kernel.org/?q=Stable+Group
[2] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
Signed-off-by: Kaustabh Chakraborty <kauschluss(a)disroot.org>
---
Kaustabh Chakraborty (3):
arm64: dts: exynos7870: add quirk to disable USB2 LPM in gadget mode
arm64: dts: exynos7870-on7xelte: reduce memory ranges to base amount
arm64: dts: exynos7870-j6lte: reduce memory ranges to base amount
arch/arm64/boot/dts/exynos/exynos7870-j6lte.dts | 2 +-
arch/arm64/boot/dts/exynos/exynos7870-on7xelte.dts | 2 +-
arch/arm64/boot/dts/exynos/exynos7870.dtsi | 1 +
3 files changed, 3 insertions(+), 2 deletions(-)
---
base-commit: 1b152eeca84a02bdb648f16b82ef3394007a9dcf
change-id: 20250626-exynos7870-dts-fixes-e730f7086ddc
Best regards,
--
Kaustabh Chakraborty <kauschluss(a)disroot.org>
From: Ge Yang <yangge1116(a)126.com>
Since commit d228814b1913 ("efi/libstub: Add get_event_log() support
for CC platforms") reuses TPM2 support code for the CC platforms, when
launching a TDX virtual machine with coco measurement enabled, the
following error log is generated:
[Firmware Bug]: Failed to parse event in TPM Final Events Log
Call Trace:
efi_config_parse_tables()
efi_tpm_eventlog_init()
tpm2_calc_event_log_size()
__calc_tpm2_event_size()
The pcr_idx value in the Intel TDX log header is 1, causing the
function __calc_tpm2_event_size() to fail to recognize the log header,
ultimately leading to the "Failed to parse event in TPM Final Events
Log" error.
According to UEFI Spec 2.10 Section 38.4.1: For Tdx, TPM PCR 0 maps to
MRTD, so the log header uses TPM PCR 1. To successfully parse the TDX
event log header, the check for a pcr_idx value of 0 has been removed
here, and it appears that this will not affect other functionalities.
Link: https://uefi.org/specs/UEFI/2.10/38_Confidential_Computing.html#intel-trust…
Fixes: d228814b1913 ("efi/libstub: Add get_event_log() support for CC platforms")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Cc: stable(a)vger.kernel.org
---
include/linux/tpm_eventlog.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 891368e..05c0ae5 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -202,8 +202,7 @@ static __always_inline u32 __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev
event_type = event->event_type;
/* Verify that it's the log header */
- if (event_header->pcr_idx != 0 ||
- event_header->event_type != NO_ACTION ||
+ if (event_header->event_type != NO_ACTION ||
memcmp(event_header->digest, zero_digest, sizeof(zero_digest))) {
size = 0;
goto out;
--
2.7.4
Andy, Kees,
On Wed, Apr 16, 2025 at 09:45:52AM +0300, Andy Shevchenko wrote:
> On Tue, Apr 15, 2025 at 04:14:24PM -0700, Kees Cook wrote:
> > The 20 byte length of struct platform_device_id::name is not long enough
> > for many devices (especially regulators), where the string initialization
> > is getting truncated and missing the trailing NUL byte. This is seen
> > with GCC 15's -Wunterminated-string-initialization option:
> >
> > drivers/regulator/hi6421v530-regulator.c:189:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
> > 189 | { .name = "hi6421v530-regulator" },
> > | ^~~~~~~~~~~~~~~~~~~~~~
> > drivers/regulator/hi6421v600-regulator.c:278:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
> > 278 | { .name = "hi6421v600-regulator" },
> > | ^~~~~~~~~~~~~~~~~~~~~~
> > drivers/regulator/lp87565-regulator.c:233:11: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
> > 233 | { "lp87565-q1-regulator", },
> > | ^~~~~~~~~~~~~~~~~~~~~~
> > sound/soc/fsl/imx-pcm-rpmsg.c:818:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
> > 818 | { .name = "rpmsg-micfil-channel" },
> > | ^~~~~~~~~~~~~~~~~~~~~~
> > drivers/iio/light/hid-sensor-als.c:457:25: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
> > 457 | .name = "HID-SENSOR-LISS-0041",
> > | ^~~~~~~~~~~~~~~~~~~~~~
> > drivers/iio/light/hid-sensor-prox.c:366:25: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
> > 366 | .name = "HID-SENSOR-LISS-0226",
> > | ^~~~~~~~~~~~~~~~~~~~~~
> >
> > Increase the length to 24, slightly more than is currently being used by
> > the affected drivers. The string is used in '%s' format strings and via
> > the module code, which appears to do its own length encoding. This size
> > was chosen because there was already a 4 byte hole in the structure:
> >
> > struct platform_device_id {
> > char name[20]; /* 0 20 */
> >
> > /* XXX 4 bytes hole, try to pack */
> >
> > kernel_ulong_t driver_data; /* 24 8 */
> >
> > /* size: 32, cachelines: 1, members: 2 */
> > /* sum members: 28, holes: 1, sum holes: 4 */
> > /* last cacheline: 32 bytes */
> > };
>
> Since there is no even potential ABI breakage, I'm fine with the change.
> Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
This definitely breaks ABI on 32-bit architectures such as i586, because there
is no gap from alignment. Perhaps, this also make the commit not suitable for
backporting to stable branches?
I recently stumbled on build failure on v5.10.239 for i586:
make: Entering directory '/usr/src/kernel-source-5.10'
DEPMOD 5.10.239
depmod: FATAL: Module index: bad character '�'=0x80 - only 7-bit ASCII is supported:
platform:jsl_rt5682_max98360ax�
make: *** [Makefile:1786: modules_install] Error 1
make: Leaving directory '/usr/src/kernel-source-5.10'
With this patch not applied "jsl_rt5682_max98360a" have terminating '\0'
truncated due to PLATFORM_NAME_SIZE being same as the string length and
concatenated with the following binary data:
{
.name = "jsl_rt5682_max98360a",
.driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN |
SOF_RT5682_MCLK_24MHZ |
SOF_RT5682_SSP_CODEC(0) |
SOF_SPEAKER_AMP_PRESENT |
SOF_MAX98360A_SPEAKER_AMP_PRESENT |
SOF_RT5682_SSP_AMP(1)),
},
modpost then interprets it as an asciiz string concatenating with `driver_data`
resulting in bad characters.
static int do_platform_entry(const char *filename,
void *symval, char *alias)
{
DEF_FIELD_ADDR(symval, platform_device_id, name);
sprintf(alias, PLATFORM_MODULE_PREFIX "%s", *name);
return 1;
}
creating in an incorrect alias, and this somehow breaks depmod in kmod 34.2
(maybe earlier).
Old kmod 30 successfully adds incorrect alias:
$ modinfo snd-soc-sof_rt5682.ko | grep jsl_rt5682_max98360a
alias: platform:jsl_rt5682_max98360a
alias: platform:jsl_rt5682_max98360ax�
and
modules.alias:alias platform:jsl_rt5682_max98360ax� snd_soc_sof_rt5682
Perhaps, scripts/mod/file2alias.c should be updated with:
- sprintf(alias, PLATFORM_MODULE_PREFIX "%s", *name);
+ sprintf(alias, PLATFORM_MODULE_PREFIX "%.*s", PLATFORM_NAME_SIZE, *name);
(Or even producing an error if more serious truncation occurs.)
Thanks,
>
> --
> With Best Regards,
> Andy Shevchenko
>
>
The patch titled
Subject: mm/zsmalloc: do not pass __GFP_MOVABLE if CONFIG_COMPACTION=n
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-zsmalloc-do-not-pass-__gfp_movable-if-config_compaction=n.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Harry Yoo <harry.yoo(a)oracle.com>
Subject: mm/zsmalloc: do not pass __GFP_MOVABLE if CONFIG_COMPACTION=n
Date: Fri, 4 Jul 2025 19:30:53 +0900
Commit 48b4800a1c6a ("zsmalloc: page migration support") added support for
migrating zsmalloc pages using the movable_operations migration framework.
However, the commit did not take into account that zsmalloc supports
migration only when CONFIG_COMPACTION is enabled. Tracing shows that
zsmalloc was still passing the __GFP_MOVABLE flag even when compaction is
not supported.
This can result in unmovable pages being allocated from movable page
blocks (even without stealing page blocks), ZONE_MOVABLE and CMA area.
Possible user visible effects:
- Some ZONE_MOVABLE memory can be not actually movable
- CMA allocation can fail because of this
- Increased memory fragmentation due to ignoring the page mobility
grouping feature
I'm not really sure who uses kernels without compaction support, though :(
To fix this, clear the __GFP_MOVABLE flag when
!IS_ENABLED(CONFIG_COMPACTION).
Link: https://lkml.kernel.org/r/20250704103053.6913-1-harry.yoo@oracle.com
Fixes: 48b4800a1c6a ("zsmalloc: page migration support")
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/zsmalloc.c | 3 +++
1 file changed, 3 insertions(+)
--- a/mm/zsmalloc.c~mm-zsmalloc-do-not-pass-__gfp_movable-if-config_compaction=n
+++ a/mm/zsmalloc.c
@@ -1043,6 +1043,9 @@ static struct zspage *alloc_zspage(struc
if (!zspage)
return NULL;
+ if (!IS_ENABLED(CONFIG_COMPACTION))
+ gfp &= ~__GFP_MOVABLE;
+
zspage->magic = ZSPAGE_MAGIC;
zspage->pool = pool;
zspage->class = class->index;
_
Patches currently in -mm which might be from harry.yoo(a)oracle.com are
lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch
lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users-v3.patch
mm-zsmalloc-do-not-pass-__gfp_movable-if-config_compaction=n.patch
The patch titled
Subject: mm/shmem, swap: improve cached mTHP handling and fix potential hang
has been added to the -mm mm-unstable branch. Its filename is
mm-shmem-swap-improve-cached-mthp-handling-and-fix-potential-hung.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kairui Song <kasong(a)tencent.com>
Subject: mm/shmem, swap: improve cached mTHP handling and fix potential hang
Date: Sat, 5 Jul 2025 02:17:40 +0800
Patch series "mm/shmem, swap: bugfix and improvement of mTHP swap-in", v4.
The current mTHP swapin path have several problems. It may potentially
hang, may cause redundant faults due to false positive swap cache lookup,
and it will involve at least 4 Xarray tree walks (get order, get order
again, confirm swap, insert folio). And for !CONFIG_TRANSPARENT_HUGEPAGE
builds, it will performs some mTHP related checks.
This series fixes all of the mentioned issues, and the code should be more
robust and prepared for the swap table series. Now tree walks is reduced
to twice (get order & confirm, insert folio), !CONFIG_TRANSPARENT_HUGEPAGE
build overhead is also minimized, and comes with a sanity check now.
The performance is slightly better after this series, sequential swap in
of 24G data from ZRAM, using transparent_hugepage_tmpfs=always (24 samples
each):
Before: 11.02, stddev: 0.06
After patch 1: 10.74, stddev: 0.03
After patch 2: 10.72, stddev: 0.01
After patch 3: 10.73, stddev: 0.04
After patch 4: 10.72, stddev: 0.02
After patch 5: 10.74, stddev: 0.01
After patch 6: 10.13, stddev: 0.09
After patch 7: 9.95, stddev: 0.02
After patch 8: 9.88, stddev: 0.04
Each patch improves the performance by a little, which is about ~10%
faster in total.
Build kernel test showed very slightly improvement, testing with make -j24
with defconfig in a 256M memcg also using ZRAM as swap, and
transparent_hugepage_tmpfs=always (6 test runs):
Before: system time avg: 3911.80s
After: system time avg: 3863.76s
This patch (of 9):
The current swap-in code assumes that, when a swap entry in shmem mapping
is order 0, its cached folios (if present) must be order 0 too, which
turns out not always correct.
The problem is shmem_split_large_entry is called before verifying the
folio will eventually be swapped in, one possible race is:
CPU1 CPU2
shmem_swapin_folio
/* swap in of order > 0 swap entry S1 */
folio = swap_cache_get_folio
/* folio = NULL */
order = xa_get_order
/* order > 0 */
folio = shmem_swap_alloc_folio
/* mTHP alloc failure, folio = NULL */
<... Interrupted ...>
shmem_swapin_folio
/* S1 is swapped in */
shmem_writeout
/* S1 is swapped out, folio cached */
shmem_split_large_entry(..., S1)
/* S1 is split, but the folio covering it has order > 0 now */
Now any following swapin of S1 will hang: `xa_get_order` returns 0, and
folio lookup will return a folio with order > 0. The
`xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always
return false causing swap-in to return -EEXIST.
And this looks fragile. So fix this up by allowing seeing a larger folio
in swap cache, and check the whole shmem mapping range covered by the
swapin have the right swap value upon inserting the folio. And drop the
redundant tree walks before the insertion.
This will actually improve performance, as it avoids two redundant Xarray
tree walks in the hot path, and the only side effect is that in the
failure path, shmem may redundantly reallocate a few folios causing
temporary slight memory pressure.
And worth noting, it may seem the order and value check before inserting
might help reducing the lock contention, which is not true. The swap
cache layer ensures raced swapin will either see a swap cache folio or
failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is
bypassed), so holding the folio lock and checking the folio flag is
already good enough for avoiding the lock contention. The chance that a
folio passes the swap entry value check but the shmem mapping slot has
changed should be very low.
Link: https://lkml.kernel.org/r/20250704181748.63181-1-ryncsn@gmail.com
Link: https://lkml.kernel.org/r/20250704181748.63181-2-ryncsn@gmail.com
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: <stable(a)vger.kernel.org>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Chris Li <chrisl(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/shmem.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
--- a/mm/shmem.c~mm-shmem-swap-improve-cached-mthp-handling-and-fix-potential-hung
+++ a/mm/shmem.c
@@ -884,7 +884,9 @@ static int shmem_add_to_page_cache(struc
pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
- long nr = folio_nr_pages(folio);
+ unsigned long nr = folio_nr_pages(folio);
+ swp_entry_t iter, swap;
+ void *entry;
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -896,14 +898,24 @@ static int shmem_add_to_page_cache(struc
gfp &= GFP_RECLAIM_MASK;
folio_throttle_swaprate(folio, gfp);
+ swap = iter = radix_to_swp_entry(expected);
do {
xas_lock_irq(&xas);
- if (expected != xas_find_conflict(&xas)) {
- xas_set_err(&xas, -EEXIST);
- goto unlock;
+ xas_for_each_conflict(&xas, entry) {
+ /*
+ * The range must either be empty, or filled with
+ * expected swap entries. Shmem swap entries are never
+ * partially freed without split of both entry and
+ * folio, so there shouldn't be any holes.
+ */
+ if (!expected || entry != swp_to_radix_entry(iter)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ iter.val += 1 << xas_get_order(&xas);
}
- if (expected && xas_find_conflict(&xas)) {
+ if (expected && iter.val - nr != swap.val) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
@@ -2323,7 +2335,7 @@ static int shmem_swapin_folio(struct ino
error = -ENOMEM;
goto failed;
}
- } else if (order != folio_order(folio)) {
+ } else if (order > folio_order(folio)) {
/*
* Swap readahead may swap in order 0 folios into swapcache
* asynchronously, while the shmem mapping can still stores
@@ -2348,15 +2360,15 @@ static int shmem_swapin_folio(struct ino
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
+ } else if (order < folio_order(folio)) {
+ swap.val = round_down(swap.val, 1 << folio_order(folio));
}
alloced:
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
- folio->swap.val != swap.val ||
- !shmem_confirm_swap(mapping, index, swap) ||
- xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
+ folio->swap.val != swap.val) {
error = -EEXIST;
goto unlock;
}
_
Patches currently in -mm which might be from kasong(a)tencent.com are
mm-list_lru-refactor-the-locking-code.patch
mm-shmem-swap-improve-cached-mthp-handling-and-fix-potential-hung.patch
mm-shmem-swap-avoid-redundant-xarray-lookup-during-swapin.patch
mm-shmem-swap-tidy-up-thp-swapin-checks.patch
mm-shmem-swap-tidy-up-swap-entry-splitting.patch
mm-shmem-swap-avoid-false-positive-swap-cache-lookup.patch
mm-shmem-swap-never-use-swap-cache-and-readahead-for-swp_synchronous_io.patch
mm-shmem-swap-simplify-swapin-path-and-result-handling.patch
mm-shmem-swap-simplify-swap-entry-and-index-calculation-of-large-swapin.patch
mm-shmem-swap-fix-major-fault-counting.patch
From: Kairui Song <kasong(a)tencent.com>
The current swap-in code assumes that, when a swap entry in shmem mapping
is order 0, its cached folios (if present) must be order 0 too, which
turns out not always correct.
The problem is shmem_split_large_entry is called before verifying the
folio will eventually be swapped in, one possible race is:
CPU1 CPU2
shmem_swapin_folio
/* swap in of order > 0 swap entry S1 */
folio = swap_cache_get_folio
/* folio = NULL */
order = xa_get_order
/* order > 0 */
folio = shmem_swap_alloc_folio
/* mTHP alloc failure, folio = NULL */
<... Interrupted ...>
shmem_swapin_folio
/* S1 is swapped in */
shmem_writeout
/* S1 is swapped out, folio cached */
shmem_split_large_entry(..., S1)
/* S1 is split, but the folio covering it has order > 0 now */
Now any following swapin of S1 will hang: `xa_get_order` returns 0, and
folio lookup will return a folio with order > 0. The
`xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always
return false causing swap-in to return -EEXIST.
And this looks fragile. So fix this up by allowing seeing a larger folio
in swap cache, and check the whole shmem mapping range covered by the
swapin have the right swap value upon inserting the folio. And drop the
redundant tree walks before the insertion.
This will actually improve performance, as it avoids two redundant Xarray
tree walks in the hot path, and the only side effect is that in the
failure path, shmem may redundantly reallocate a few folios causing
temporary slight memory pressure.
And worth noting, it may seems the order and value check before inserting
might help reducing the lock contention, which is not true. The swap
cache layer ensures raced swapin will either see a swap cache folio or
failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is
bypassed), so holding the folio lock and checking the folio flag is
already good enough for avoiding the lock contention. The chance that a
folio passes the swap entry value check but the shmem mapping slot has
changed should be very low.
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: <stable(a)vger.kernel.org>
---
mm/shmem.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 334b7b4a61a0..e3c9a1365ff4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -884,7 +884,9 @@ static int shmem_add_to_page_cache(struct folio *folio,
pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
- long nr = folio_nr_pages(folio);
+ unsigned long nr = folio_nr_pages(folio);
+ swp_entry_t iter, swap;
+ void *entry;
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -896,14 +898,24 @@ static int shmem_add_to_page_cache(struct folio *folio,
gfp &= GFP_RECLAIM_MASK;
folio_throttle_swaprate(folio, gfp);
+ swap = iter = radix_to_swp_entry(expected);
do {
xas_lock_irq(&xas);
- if (expected != xas_find_conflict(&xas)) {
- xas_set_err(&xas, -EEXIST);
- goto unlock;
+ xas_for_each_conflict(&xas, entry) {
+ /*
+ * The range must either be empty, or filled with
+ * expected swap entries. Shmem swap entries are never
+ * partially freed without split of both entry and
+ * folio, so there shouldn't be any holes.
+ */
+ if (!expected || entry != swp_to_radix_entry(iter)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ iter.val += 1 << xas_get_order(&xas);
}
- if (expected && xas_find_conflict(&xas)) {
+ if (expected && iter.val - nr != swap.val) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
@@ -2323,7 +2335,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -ENOMEM;
goto failed;
}
- } else if (order != folio_order(folio)) {
+ } else if (order > folio_order(folio)) {
/*
* Swap readahead may swap in order 0 folios into swapcache
* asynchronously, while the shmem mapping can still stores
@@ -2348,15 +2360,15 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
+ } else if (order < folio_order(folio)) {
+ swap.val = round_down(swap.val, 1 << folio_order(folio));
}
alloced:
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
- folio->swap.val != swap.val ||
- !shmem_confirm_swap(mapping, index, swap) ||
- xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
+ folio->swap.val != swap.val) {
error = -EEXIST;
goto unlock;
}
--
2.50.0