The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5f465c148c61e876b6d6eacd8e8e365f2d47758f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025063012-clammy-bluish-930d@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5f465c148c61e876b6d6eacd8e8e365f2d47758f Mon Sep 17 00:00:00 2001
From: "Xin Li (Intel)" <xin(a)zytor.com>
Date: Fri, 20 Jun 2025 16:15:03 -0700
Subject: [PATCH] x86/traps: Initialize DR6 by writing its architectural reset
value
Initialize DR6 by writing its architectural reset value to avoid
incorrectly zeroing DR6 to clear DR6.BLD at boot time, which leads
to a false bus lock detected warning.
The Intel SDM says:
1) Certain debug exceptions may clear bits 0-3 of DR6.
2) BLD induced #DB clears DR6.BLD and any other debug exception
doesn't modify DR6.BLD.
3) RTM induced #DB clears DR6.RTM and any other debug exception
sets DR6.RTM.
To avoid confusion in identifying debug exceptions, debug handlers
should set DR6.BLD and DR6.RTM, and clear other DR6 bits before
returning.
The DR6 architectural reset value 0xFFFF0FF0, already defined as
macro DR6_RESERVED, satisfies these requirements, so just use it to
reinitialize DR6 whenever needed.
Since clear_all_debug_regs() no longer zeros all debug registers,
rename it to initialize_debug_regs() to better reflect its current
behavior.
Since debug_read_clear_dr6() no longer clears DR6, rename it to
debug_read_reset_dr6() to better reflect its current behavior.
Fixes: ebb1064e7c2e9 ("x86/traps: Handle #DB for bus lock")
Reported-by: Sohil Mehta <sohil.mehta(a)intel.com>
Suggested-by: H. Peter Anvin (Intel) <hpa(a)zytor.com>
Signed-off-by: Xin Li (Intel) <xin(a)zytor.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: H. Peter Anvin (Intel) <hpa(a)zytor.com>
Reviewed-by: Sohil Mehta <sohil.mehta(a)intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Tested-by: Sohil Mehta <sohil.mehta(a)intel.com>
Link: https://lore.kernel.org/lkml/06e68373-a92b-472e-8fd9-ba548119770c@intel.com/
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250620231504.2676902-2-xin%40zytor.com
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 0007ba077c0c..41da492dfb01 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,26 @@
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
-/* Define reserved bits in DR6 which are always set to 1 */
+/*
+ * Define bits in DR6 which are set to 1 by default.
+ *
+ * This is also the DR6 architectural value following Power-up, Reset or INIT.
+ *
+ * Note, with the introduction of Bus Lock Detection (BLD) and Restricted
+ * Transactional Memory (RTM), the DR6 register has been modified:
+ *
+ * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports
+ * Bus Lock Detection. The assertion of a bus lock could clear it.
+ *
+ * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports
+ * restricted transactional memory. #DB occurred inside an RTM region
+ * could clear it.
+ *
+ * Apparently, DR6.BLD and DR6.RTM are active low bits.
+ *
+ * As a result, DR6_RESERVED is an incorrect name now, but it is kept for
+ * compatibility.
+ */
#define DR6_RESERVED (0xFFFF0FF0)
#define DR_TRAP0 (0x1) /* db0 */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8feb8fd2957a..0f6c280a94f0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2243,20 +2243,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif
-/*
- * Clear all 6 debug registers:
- */
-static void clear_all_debug_regs(void)
+static void initialize_debug_regs(void)
{
- int i;
-
- for (i = 0; i < 8; i++) {
- /* Ignore db4, db5 */
- if ((i == 4) || (i == 5))
- continue;
-
- set_debugreg(0, i);
- }
+ /* Control register first -- to make sure everything is disabled. */
+ set_debugreg(0, 7);
+ set_debugreg(DR6_RESERVED, 6);
+ /* dr5 and dr4 don't exist */
+ set_debugreg(0, 3);
+ set_debugreg(0, 2);
+ set_debugreg(0, 1);
+ set_debugreg(0, 0);
}
#ifdef CONFIG_KGDB
@@ -2417,7 +2413,7 @@ void cpu_init(void)
load_mm_ldt(&init_mm);
- clear_all_debug_regs();
+ initialize_debug_regs();
dbg_restore_debug_regs();
doublefault_init_cpu_tss();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c5c897a86418..36354b470590 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline unsigned long debug_read_clear_dr6(void)
+static __always_inline unsigned long debug_read_reset_dr6(void)
{
unsigned long dr6;
+ get_debugreg(dr6, 6);
+ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
+
/*
* The Intel SDM says:
*
- * Certain debug exceptions may clear bits 0-3. The remaining
- * contents of the DR6 register are never cleared by the
- * processor. To avoid confusion in identifying debug
- * exceptions, debug handlers should clear the register before
- * returning to the interrupted task.
+ * Certain debug exceptions may clear bits 0-3 of DR6.
*
- * Keep it simple: clear DR6 immediately.
+ * BLD induced #DB clears DR6.BLD and any other debug
+ * exception doesn't modify DR6.BLD.
+ *
+ * RTM induced #DB clears DR6.RTM and any other debug
+ * exception sets DR6.RTM.
+ *
+ * To avoid confusion in identifying debug exceptions,
+ * debug handlers should set DR6.BLD and DR6.RTM, and
+ * clear other DR6 bits before returning.
+ *
+ * Keep it simple: write DR6 with its architectural reset
+ * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately.
*/
- get_debugreg(dr6, 6);
set_debugreg(DR6_RESERVED, 6);
- dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
return dr6;
}
@@ -1239,13 +1247,13 @@ static noinstr void exc_debug_user(struct pt_regs *regs, unsigned long dr6)
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- exc_debug_kernel(regs, debug_read_clear_dr6());
+ exc_debug_kernel(regs, debug_read_reset_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- exc_debug_user(regs, debug_read_clear_dr6());
+ exc_debug_user(regs, debug_read_reset_dr6());
}
#ifdef CONFIG_X86_FRED
@@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
{
/*
* FRED #DB stores DR6 on the stack in the format which
- * debug_read_clear_dr6() returns for the IDT entry points.
+ * debug_read_reset_dr6() returns for the IDT entry points.
*/
unsigned long dr6 = fred_event_data(regs);
@@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6 = debug_read_clear_dr6();
+ unsigned long dr6 = debug_read_reset_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 5f465c148c61e876b6d6eacd8e8e365f2d47758f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025063008-shimmy-doorstep-3c89@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5f465c148c61e876b6d6eacd8e8e365f2d47758f Mon Sep 17 00:00:00 2001
From: "Xin Li (Intel)" <xin(a)zytor.com>
Date: Fri, 20 Jun 2025 16:15:03 -0700
Subject: [PATCH] x86/traps: Initialize DR6 by writing its architectural reset
value
Initialize DR6 by writing its architectural reset value to avoid
incorrectly zeroing DR6 to clear DR6.BLD at boot time, which leads
to a false bus lock detected warning.
The Intel SDM says:
1) Certain debug exceptions may clear bits 0-3 of DR6.
2) BLD induced #DB clears DR6.BLD and any other debug exception
doesn't modify DR6.BLD.
3) RTM induced #DB clears DR6.RTM and any other debug exception
sets DR6.RTM.
To avoid confusion in identifying debug exceptions, debug handlers
should set DR6.BLD and DR6.RTM, and clear other DR6 bits before
returning.
The DR6 architectural reset value 0xFFFF0FF0, already defined as
macro DR6_RESERVED, satisfies these requirements, so just use it to
reinitialize DR6 whenever needed.
Since clear_all_debug_regs() no longer zeros all debug registers,
rename it to initialize_debug_regs() to better reflect its current
behavior.
Since debug_read_clear_dr6() no longer clears DR6, rename it to
debug_read_reset_dr6() to better reflect its current behavior.
Fixes: ebb1064e7c2e9 ("x86/traps: Handle #DB for bus lock")
Reported-by: Sohil Mehta <sohil.mehta(a)intel.com>
Suggested-by: H. Peter Anvin (Intel) <hpa(a)zytor.com>
Signed-off-by: Xin Li (Intel) <xin(a)zytor.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: H. Peter Anvin (Intel) <hpa(a)zytor.com>
Reviewed-by: Sohil Mehta <sohil.mehta(a)intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Tested-by: Sohil Mehta <sohil.mehta(a)intel.com>
Link: https://lore.kernel.org/lkml/06e68373-a92b-472e-8fd9-ba548119770c@intel.com/
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250620231504.2676902-2-xin%40zytor.com
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 0007ba077c0c..41da492dfb01 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,26 @@
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
-/* Define reserved bits in DR6 which are always set to 1 */
+/*
+ * Define bits in DR6 which are set to 1 by default.
+ *
+ * This is also the DR6 architectural value following Power-up, Reset or INIT.
+ *
+ * Note, with the introduction of Bus Lock Detection (BLD) and Restricted
+ * Transactional Memory (RTM), the DR6 register has been modified:
+ *
+ * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports
+ * Bus Lock Detection. The assertion of a bus lock could clear it.
+ *
+ * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports
+ * restricted transactional memory. #DB occurred inside an RTM region
+ * could clear it.
+ *
+ * Apparently, DR6.BLD and DR6.RTM are active low bits.
+ *
+ * As a result, DR6_RESERVED is an incorrect name now, but it is kept for
+ * compatibility.
+ */
#define DR6_RESERVED (0xFFFF0FF0)
#define DR_TRAP0 (0x1) /* db0 */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8feb8fd2957a..0f6c280a94f0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2243,20 +2243,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif
-/*
- * Clear all 6 debug registers:
- */
-static void clear_all_debug_regs(void)
+static void initialize_debug_regs(void)
{
- int i;
-
- for (i = 0; i < 8; i++) {
- /* Ignore db4, db5 */
- if ((i == 4) || (i == 5))
- continue;
-
- set_debugreg(0, i);
- }
+ /* Control register first -- to make sure everything is disabled. */
+ set_debugreg(0, 7);
+ set_debugreg(DR6_RESERVED, 6);
+ /* dr5 and dr4 don't exist */
+ set_debugreg(0, 3);
+ set_debugreg(0, 2);
+ set_debugreg(0, 1);
+ set_debugreg(0, 0);
}
#ifdef CONFIG_KGDB
@@ -2417,7 +2413,7 @@ void cpu_init(void)
load_mm_ldt(&init_mm);
- clear_all_debug_regs();
+ initialize_debug_regs();
dbg_restore_debug_regs();
doublefault_init_cpu_tss();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c5c897a86418..36354b470590 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline unsigned long debug_read_clear_dr6(void)
+static __always_inline unsigned long debug_read_reset_dr6(void)
{
unsigned long dr6;
+ get_debugreg(dr6, 6);
+ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
+
/*
* The Intel SDM says:
*
- * Certain debug exceptions may clear bits 0-3. The remaining
- * contents of the DR6 register are never cleared by the
- * processor. To avoid confusion in identifying debug
- * exceptions, debug handlers should clear the register before
- * returning to the interrupted task.
+ * Certain debug exceptions may clear bits 0-3 of DR6.
*
- * Keep it simple: clear DR6 immediately.
+ * BLD induced #DB clears DR6.BLD and any other debug
+ * exception doesn't modify DR6.BLD.
+ *
+ * RTM induced #DB clears DR6.RTM and any other debug
+ * exception sets DR6.RTM.
+ *
+ * To avoid confusion in identifying debug exceptions,
+ * debug handlers should set DR6.BLD and DR6.RTM, and
+ * clear other DR6 bits before returning.
+ *
+ * Keep it simple: write DR6 with its architectural reset
+ * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately.
*/
- get_debugreg(dr6, 6);
set_debugreg(DR6_RESERVED, 6);
- dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
return dr6;
}
@@ -1239,13 +1247,13 @@ static noinstr void exc_debug_user(struct pt_regs *regs, unsigned long dr6)
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- exc_debug_kernel(regs, debug_read_clear_dr6());
+ exc_debug_kernel(regs, debug_read_reset_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- exc_debug_user(regs, debug_read_clear_dr6());
+ exc_debug_user(regs, debug_read_reset_dr6());
}
#ifdef CONFIG_X86_FRED
@@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
{
/*
* FRED #DB stores DR6 on the stack in the format which
- * debug_read_clear_dr6() returns for the IDT entry points.
+ * debug_read_reset_dr6() returns for the IDT entry points.
*/
unsigned long dr6 = fred_event_data(regs);
@@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6 = debug_read_clear_dr6();
+ unsigned long dr6 = debug_read_reset_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x a3f3040657417aeadb9622c629d4a0c2693a0f93
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025063041-arrival-closable-5d15@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a3f3040657417aeadb9622c629d4a0c2693a0f93 Mon Sep 17 00:00:00 2001
From: Avadhut Naik <avadhut.naik(a)amd.com>
Date: Thu, 29 May 2025 20:50:04 +0000
Subject: [PATCH] EDAC/amd64: Fix size calculation for Non-Power-of-Two DIMMs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Each Chip-Select (CS) of a Unified Memory Controller (UMC) on AMD Zen-based
SOCs has an Address Mask and a Secondary Address Mask register associated with
it. The amd64_edac module logs DIMM sizes on a per-UMC per-CS granularity
during init using these two registers.
Currently, the module primarily considers only the Address Mask register for
computing DIMM sizes. The Secondary Address Mask register is only considered
for odd CS. Additionally, if it has been considered, the Address Mask register
is ignored altogether for that CS. For power-of-two DIMMs i.e. DIMMs whose
total capacity is a power of two (32GB, 64GB, etc), this is not an issue
since only the Address Mask register is used.
For non-power-of-two DIMMs i.e., DIMMs whose total capacity is not a power of
two (48GB, 96GB, etc), however, the Secondary Address Mask register is used
in conjunction with the Address Mask register. However, since the module only
considers either of the two registers for a CS, the size computed by the
module is incorrect. The Secondary Address Mask register is not considered for
even CS, and the Address Mask register is not considered for odd CS.
Introduce a new helper function so that both Address Mask and Secondary
Address Mask registers are considered, when valid, for computing DIMM sizes.
Furthermore, also rename some variables for greater clarity.
Fixes: 81f5090db843 ("EDAC/amd64: Support asymmetric dual-rank DIMMs")
Closes: https://lore.kernel.org/dbec22b6-00f2-498b-b70d-ab6f8a5ec87e@natrix.lt
Reported-by: Žilvinas Žaltiena <zilvinas(a)natrix.lt>
Signed-off-by: Avadhut Naik <avadhut.naik(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Tested-by: Žilvinas Žaltiena <zilvinas(a)natrix.lt>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20250529205013.403450-1-avadhut.naik@amd.com
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index b681c0663203..07f1e9dc1ca7 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_PRIMARY;
- /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_SECONDARY;
+
if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_SECONDARY;
@@ -1230,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
-static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
- int csrow_nr, int dimm)
+static int calculate_cs_size(u32 mask, unsigned int cs_mode)
{
- u32 msb, weight, num_zero_bits;
- u32 addr_mask_deinterleaved;
- int size = 0;
+ int msb, weight, num_zero_bits;
+ u32 deinterleaved_mask;
+
+ if (!mask)
+ return 0;
/*
* The number of zero bits in the mask is equal to the number of bits
@@ -1248,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
* without swapping with the most significant bit. This can be handled
* by keeping the MSB where it is and ignoring the single zero bit.
*/
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
+ msb = fls(mask) - 1;
+ weight = hweight_long(mask);
num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
/* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+ deinterleaved_mask = GENMASK(msb - num_zero_bits, 1);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask);
+
+ return (deinterleaved_mask >> 2) + 1;
+}
+
+static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec,
+ unsigned int cs_mode, int csrow_nr, int dimm)
+{
+ int size;
edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+ edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask);
/* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
+ size = calculate_cs_size(addr_mask, cs_mode);
+
+ edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec);
+ size += calculate_cs_size(addr_mask_sec, cs_mode);
/* Return size in MBs. */
return size >> 10;
@@ -1269,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
+ u32 addr_mask = 0, addr_mask_sec = 0;
int cs_mask_nr = csrow_nr;
- u32 addr_mask_orig;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -1308,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
if (!pvt->flags.zn_regs_v2)
cs_mask_nr >>= 1;
- /* Asymmetric dual-rank DIMM support. */
- if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- else
- addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
+ if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY))
+ addr_mask = pvt->csels[umc].csmasks[cs_mask_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
+ if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY))
+ addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr];
+
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm);
}
static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3512,9 +3526,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err)
static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1);
}
static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x a3f3040657417aeadb9622c629d4a0c2693a0f93
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025063036-washroom-swiftness-1860@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a3f3040657417aeadb9622c629d4a0c2693a0f93 Mon Sep 17 00:00:00 2001
From: Avadhut Naik <avadhut.naik(a)amd.com>
Date: Thu, 29 May 2025 20:50:04 +0000
Subject: [PATCH] EDAC/amd64: Fix size calculation for Non-Power-of-Two DIMMs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Each Chip-Select (CS) of a Unified Memory Controller (UMC) on AMD Zen-based
SOCs has an Address Mask and a Secondary Address Mask register associated with
it. The amd64_edac module logs DIMM sizes on a per-UMC per-CS granularity
during init using these two registers.
Currently, the module primarily considers only the Address Mask register for
computing DIMM sizes. The Secondary Address Mask register is only considered
for odd CS. Additionally, if it has been considered, the Address Mask register
is ignored altogether for that CS. For power-of-two DIMMs i.e. DIMMs whose
total capacity is a power of two (32GB, 64GB, etc), this is not an issue
since only the Address Mask register is used.
For non-power-of-two DIMMs i.e., DIMMs whose total capacity is not a power of
two (48GB, 96GB, etc), however, the Secondary Address Mask register is used
in conjunction with the Address Mask register. However, since the module only
considers either of the two registers for a CS, the size computed by the
module is incorrect. The Secondary Address Mask register is not considered for
even CS, and the Address Mask register is not considered for odd CS.
Introduce a new helper function so that both Address Mask and Secondary
Address Mask registers are considered, when valid, for computing DIMM sizes.
Furthermore, also rename some variables for greater clarity.
Fixes: 81f5090db843 ("EDAC/amd64: Support asymmetric dual-rank DIMMs")
Closes: https://lore.kernel.org/dbec22b6-00f2-498b-b70d-ab6f8a5ec87e@natrix.lt
Reported-by: Žilvinas Žaltiena <zilvinas(a)natrix.lt>
Signed-off-by: Avadhut Naik <avadhut.naik(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Tested-by: Žilvinas Žaltiena <zilvinas(a)natrix.lt>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20250529205013.403450-1-avadhut.naik@amd.com
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index b681c0663203..07f1e9dc1ca7 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_PRIMARY;
- /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_SECONDARY;
+
if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_SECONDARY;
@@ -1230,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
-static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
- int csrow_nr, int dimm)
+static int calculate_cs_size(u32 mask, unsigned int cs_mode)
{
- u32 msb, weight, num_zero_bits;
- u32 addr_mask_deinterleaved;
- int size = 0;
+ int msb, weight, num_zero_bits;
+ u32 deinterleaved_mask;
+
+ if (!mask)
+ return 0;
/*
* The number of zero bits in the mask is equal to the number of bits
@@ -1248,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
* without swapping with the most significant bit. This can be handled
* by keeping the MSB where it is and ignoring the single zero bit.
*/
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
+ msb = fls(mask) - 1;
+ weight = hweight_long(mask);
num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
/* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+ deinterleaved_mask = GENMASK(msb - num_zero_bits, 1);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask);
+
+ return (deinterleaved_mask >> 2) + 1;
+}
+
+static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec,
+ unsigned int cs_mode, int csrow_nr, int dimm)
+{
+ int size;
edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+ edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask);
/* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
+ size = calculate_cs_size(addr_mask, cs_mode);
+
+ edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec);
+ size += calculate_cs_size(addr_mask_sec, cs_mode);
/* Return size in MBs. */
return size >> 10;
@@ -1269,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
+ u32 addr_mask = 0, addr_mask_sec = 0;
int cs_mask_nr = csrow_nr;
- u32 addr_mask_orig;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -1308,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
if (!pvt->flags.zn_regs_v2)
cs_mask_nr >>= 1;
- /* Asymmetric dual-rank DIMM support. */
- if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- else
- addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
+ if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY))
+ addr_mask = pvt->csels[umc].csmasks[cs_mask_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
+ if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY))
+ addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr];
+
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm);
}
static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3512,9 +3526,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err)
static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1);
}
static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x a3f3040657417aeadb9622c629d4a0c2693a0f93
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025063027-delighted-selection-2dde@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a3f3040657417aeadb9622c629d4a0c2693a0f93 Mon Sep 17 00:00:00 2001
From: Avadhut Naik <avadhut.naik(a)amd.com>
Date: Thu, 29 May 2025 20:50:04 +0000
Subject: [PATCH] EDAC/amd64: Fix size calculation for Non-Power-of-Two DIMMs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Each Chip-Select (CS) of a Unified Memory Controller (UMC) on AMD Zen-based
SOCs has an Address Mask and a Secondary Address Mask register associated with
it. The amd64_edac module logs DIMM sizes on a per-UMC per-CS granularity
during init using these two registers.
Currently, the module primarily considers only the Address Mask register for
computing DIMM sizes. The Secondary Address Mask register is only considered
for odd CS. Additionally, if it has been considered, the Address Mask register
is ignored altogether for that CS. For power-of-two DIMMs i.e. DIMMs whose
total capacity is a power of two (32GB, 64GB, etc), this is not an issue
since only the Address Mask register is used.
For non-power-of-two DIMMs i.e., DIMMs whose total capacity is not a power of
two (48GB, 96GB, etc), however, the Secondary Address Mask register is used
in conjunction with the Address Mask register. However, since the module only
considers either of the two registers for a CS, the size computed by the
module is incorrect. The Secondary Address Mask register is not considered for
even CS, and the Address Mask register is not considered for odd CS.
Introduce a new helper function so that both Address Mask and Secondary
Address Mask registers are considered, when valid, for computing DIMM sizes.
Furthermore, also rename some variables for greater clarity.
Fixes: 81f5090db843 ("EDAC/amd64: Support asymmetric dual-rank DIMMs")
Closes: https://lore.kernel.org/dbec22b6-00f2-498b-b70d-ab6f8a5ec87e@natrix.lt
Reported-by: Žilvinas Žaltiena <zilvinas(a)natrix.lt>
Signed-off-by: Avadhut Naik <avadhut.naik(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Tested-by: Žilvinas Žaltiena <zilvinas(a)natrix.lt>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20250529205013.403450-1-avadhut.naik@amd.com
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index b681c0663203..07f1e9dc1ca7 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_PRIMARY;
- /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_SECONDARY;
+
if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_SECONDARY;
@@ -1230,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
-static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
- int csrow_nr, int dimm)
+static int calculate_cs_size(u32 mask, unsigned int cs_mode)
{
- u32 msb, weight, num_zero_bits;
- u32 addr_mask_deinterleaved;
- int size = 0;
+ int msb, weight, num_zero_bits;
+ u32 deinterleaved_mask;
+
+ if (!mask)
+ return 0;
/*
* The number of zero bits in the mask is equal to the number of bits
@@ -1248,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
* without swapping with the most significant bit. This can be handled
* by keeping the MSB where it is and ignoring the single zero bit.
*/
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
+ msb = fls(mask) - 1;
+ weight = hweight_long(mask);
num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
/* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+ deinterleaved_mask = GENMASK(msb - num_zero_bits, 1);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask);
+
+ return (deinterleaved_mask >> 2) + 1;
+}
+
+static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec,
+ unsigned int cs_mode, int csrow_nr, int dimm)
+{
+ int size;
edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+ edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask);
/* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
+ size = calculate_cs_size(addr_mask, cs_mode);
+
+ edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec);
+ size += calculate_cs_size(addr_mask_sec, cs_mode);
/* Return size in MBs. */
return size >> 10;
@@ -1269,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
+ u32 addr_mask = 0, addr_mask_sec = 0;
int cs_mask_nr = csrow_nr;
- u32 addr_mask_orig;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -1308,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
if (!pvt->flags.zn_regs_v2)
cs_mask_nr >>= 1;
- /* Asymmetric dual-rank DIMM support. */
- if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- else
- addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
+ if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY))
+ addr_mask = pvt->csels[umc].csmasks[cs_mask_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
+ if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY))
+ addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr];
+
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm);
}
static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3512,9 +3526,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err)
static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1);
}
static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
From: Jakob Unterwurzacher <jakob.unterwurzacher(a)cherry.de>
Hardware CS has a very slow rise time of about 6us,
causing transmission errors when CS does not reach
high between transaction.
It looks like it's not driven actively when transitioning
from low to high but switched to input, so only the CPU
pull-up pulls it high, slowly. Transitions from high to low
are fast. On the oscilloscope, CS looks like an irregular sawtooth
pattern like this:
_____
^ / |
^ /| / |
/| / | / |
/ | / | / |
___/ |___/ |_____/ |___
With cs-gpios we have a CS rise time of about 20ns, as it should be,
and CS looks rectangular.
This fixes the data errors when running a flashcp loop against a
m25p40 spi flash.
With the Rockchip 6.1 kernel we see the same slow rise time, but
for some reason CS is always high for long enough to reach a solid
high.
The RK3399 and RK3588 SoCs use the same SPI driver, so we also
checked our "Puma" (RK3399) and "Tiger" (RK3588) boards.
They do not have this problem. Hardware CS rise time is good.
Fixes: c484cf93f61b ("arm64: dts: rockchip: add PX30-µQ7 (Ringneck) SoM with Haikou baseboard")
Cc: stable(a)vger.kernel.org
Reviewed-by: Quentin Schulz <quentin.schulz(a)cherry.de>
Signed-off-by: Jakob Unterwurzacher <jakob.unterwurzacher(a)cherry.de>
---
v2:
* Rename spi1_csn0_gpio -> spi1_csn0_gpio_pin to make CHECK_DTBS=y happy
* Add pinctrl-names = "default";
v1: https://lore.kernel.org/all/20250620113549.2900285-1-jakob.unterwurzacher@c…
.../boot/dts/rockchip/px30-ringneck.dtsi | 23 +++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
index ab232e5c7ad6..4203b335a263 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
@@ -379,6 +379,18 @@ pmic_int: pmic-int {
<0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
+
+ spi1 {
+ spi1_csn0_gpio_pin: spi1-csn0-gpio-pin {
+ rockchip,pins =
+ <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+
+ spi1_csn1_gpio_pin: spi1-csn1-gpio-pin {
+ rockchip,pins =
+ <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+ };
};
&pmu_io_domains {
@@ -396,6 +408,17 @@ &sdmmc {
vqmmc-supply = <&vccio_sd>;
};
+&spi1 {
+ /*
+ * Hardware CS has a very slow rise time of about 6us,
+ * causing transmission errors.
+ * With cs-gpios we have a rise time of about 20ns.
+ */
+ cs-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>, <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi1_clk &spi1_csn0_gpio_pin &spi1_csn1_gpio_pin &spi1_miso &spi1_mosi>;
+};
+
&tsadc {
status = "okay";
};
--
2.39.5
The grp->bb_largest_free_order is updated regardless of whether
mb_optimize_scan is enabled. This can lead to inconsistencies between
grp->bb_largest_free_order and the actual s_mb_largest_free_orders list
index when mb_optimize_scan is repeatedly enabled and disabled via remount.
For example, if mb_optimize_scan is initially enabled, largest free
order is 3, and the group is in s_mb_largest_free_orders[3]. Then,
mb_optimize_scan is disabled via remount, block allocations occur,
updating largest free order to 2. Finally, mb_optimize_scan is re-enabled
via remount, more block allocations update largest free order to 1.
At this point, the group would be removed from s_mb_largest_free_orders[3]
under the protection of s_mb_largest_free_orders_locks[2]. This lock
mismatch can lead to list corruption.
To fix this, a new field bb_largest_free_order_idx is added to struct
ext4_group_info to explicitly track the list index. Then still update
bb_largest_free_order unconditionally, but only update
bb_largest_free_order_idx when mb_optimize_scan is enabled. so that there
is no inconsistency between the lock and the data to be protected.
Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
CC: stable(a)vger.kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
---
fs/ext4/ext4.h | 1 +
fs/ext4/mballoc.c | 35 ++++++++++++++++-------------------
2 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 003b8d3726e8..0e574378c6a3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3476,6 +3476,7 @@ struct ext4_group_info {
int bb_avg_fragment_size_order; /* order of average
fragment in BG */
ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
+ ext4_grpblk_t bb_largest_free_order_idx; /* index of largest frag */
ext4_group_t bb_group; /* Group number */
struct list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e6d6c2da3c6e..dc82124f0905 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1152,33 +1152,29 @@ static void
mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- int i;
+ int new, old = grp->bb_largest_free_order_idx;
- for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
- if (grp->bb_counters[i] > 0)
+ for (new = MB_NUM_ORDERS(sb) - 1; new >= 0; new--)
+ if (grp->bb_counters[new] > 0)
break;
+
+ grp->bb_largest_free_order = new;
/* No need to move between order lists? */
- if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
- i == grp->bb_largest_free_order) {
- grp->bb_largest_free_order = i;
+ if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || new == old)
return;
- }
- if (grp->bb_largest_free_order >= 0) {
- write_lock(&sbi->s_mb_largest_free_orders_locks[
- grp->bb_largest_free_order]);
+ if (old >= 0) {
+ write_lock(&sbi->s_mb_largest_free_orders_locks[old]);
list_del_init(&grp->bb_largest_free_order_node);
- write_unlock(&sbi->s_mb_largest_free_orders_locks[
- grp->bb_largest_free_order]);
+ write_unlock(&sbi->s_mb_largest_free_orders_locks[old]);
}
- grp->bb_largest_free_order = i;
- if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
- write_lock(&sbi->s_mb_largest_free_orders_locks[
- grp->bb_largest_free_order]);
+
+ grp->bb_largest_free_order_idx = new;
+ if (new >= 0 && grp->bb_free) {
+ write_lock(&sbi->s_mb_largest_free_orders_locks[new]);
list_add_tail(&grp->bb_largest_free_order_node,
- &sbi->s_mb_largest_free_orders[grp->bb_largest_free_order]);
- write_unlock(&sbi->s_mb_largest_free_orders_locks[
- grp->bb_largest_free_order]);
+ &sbi->s_mb_largest_free_orders[new]);
+ write_unlock(&sbi->s_mb_largest_free_orders_locks[new]);
}
}
@@ -3391,6 +3387,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
INIT_LIST_HEAD(&meta_group_info[i]->bb_avg_fragment_size_node);
meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
meta_group_info[i]->bb_avg_fragment_size_order = -1; /* uninit */
+ meta_group_info[i]->bb_largest_free_order_idx = -1; /* uninit */
meta_group_info[i]->bb_group = group;
mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
--
2.46.1
ufs-exynos driver enables the shareability option for gs101 which
means the descriptors need to be allocated as cacheable.
Fix the DT node and update bindings to add the dma-coherent property.
This fixes the UFS stability issues we have seen with the upstream
UFS driver.
Note this DT fix can go in independently of the other UFS fixes series
I sent recently [1], as the bootloader already leaves the sharability
bits enabled.
regards,
Peter
[1] https://lore.kernel.org/linux-scsi/20250226220414.343659-1-peter.griffin@li…
To: André Draszik <andre.draszik(a)linaro.org>
To: Tudor Ambarus <tudor.ambarus(a)linaro.org>
To: Rob Herring <robh(a)kernel.org>
To: Krzysztof Kozlowski <krzk+dt(a)kernel.org>
To: Conor Dooley <conor+dt(a)kernel.org>
To: Alim Akhtar <alim.akhtar(a)samsung.com>
To: Avri Altman <avri.altman(a)wdc.com>
To: Bart Van Assche <bvanassche(a)acm.org>
To: Martin K. Petersen <martin.petersen(a)oracle.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Cc: linux-arm-kernel(a)lists.infradead.org
Cc: linux-samsung-soc(a)vger.kernel.org
Cc: devicetree(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: linux-scsi(a)vger.kernel.org
Cc: kernel-team(a)android.com
Cc: willmcvicker(a)google.com
Signed-off-by: Peter Griffin <peter.griffin(a)linaro.org>
---
Peter Griffin (2):
arm64: dts: exynos: gs101: ufs: add dma-coherent property
scsi: ufs: dt-bindings: exynos: add dma-coherent property for gs101
Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml | 2 ++
arch/arm64/boot/dts/exynos/google/gs101.dtsi | 1 +
2 files changed, 3 insertions(+)
---
base-commit: b323d8e7bc03d27dec646bfdccb7d1a92411f189
change-id: 20250314-ufs-dma-coherent-980f2467690d
Best regards,
--
Peter Griffin <peter.griffin(a)linaro.org>
Groups with no free blocks shouldn't be in any average fragment size list.
However, when all blocks in a group are allocated(i.e., bb_fragments or
bb_free is 0), we currently skip updating the average fragment size, which
means the group isn't removed from its previous s_mb_avg_fragment_size[old]
list.
This created "zombie" groups that were always skipped during traversal as
they couldn't satisfy any block allocation requests, negatively impacting
traversal efficiency.
Therefore, when a group becomes completely free, bb_avg_fragment_size_order
is now set to -1. If the old order was not -1, a removal operation is
performed; if the new order is not -1, an insertion is performed.
Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
CC: stable(a)vger.kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
---
fs/ext4/mballoc.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 94950b07a577..e6d6c2da3c6e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -841,30 +841,30 @@ static void
mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- int new_order;
+ int new, old;
- if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0)
+ if (!test_opt2(sb, MB_OPTIMIZE_SCAN))
return;
- new_order = mb_avg_fragment_size_order(sb,
- grp->bb_free / grp->bb_fragments);
- if (new_order == grp->bb_avg_fragment_size_order)
+ old = grp->bb_avg_fragment_size_order;
+ new = grp->bb_fragments == 0 ? -1 :
+ mb_avg_fragment_size_order(sb, grp->bb_free / grp->bb_fragments);
+ if (new == old)
return;
- if (grp->bb_avg_fragment_size_order != -1) {
- write_lock(&sbi->s_mb_avg_fragment_size_locks[
- grp->bb_avg_fragment_size_order]);
+ if (old >= 0) {
+ write_lock(&sbi->s_mb_avg_fragment_size_locks[old]);
list_del(&grp->bb_avg_fragment_size_node);
- write_unlock(&sbi->s_mb_avg_fragment_size_locks[
- grp->bb_avg_fragment_size_order]);
- }
- grp->bb_avg_fragment_size_order = new_order;
- write_lock(&sbi->s_mb_avg_fragment_size_locks[
- grp->bb_avg_fragment_size_order]);
- list_add_tail(&grp->bb_avg_fragment_size_node,
- &sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
- write_unlock(&sbi->s_mb_avg_fragment_size_locks[
- grp->bb_avg_fragment_size_order]);
+ write_unlock(&sbi->s_mb_avg_fragment_size_locks[old]);
+ }
+
+ grp->bb_avg_fragment_size_order = new;
+ if (new >= 0) {
+ write_lock(&sbi->s_mb_avg_fragment_size_locks[new]);
+ list_add_tail(&grp->bb_avg_fragment_size_node,
+ &sbi->s_mb_avg_fragment_size[new]);
+ write_unlock(&sbi->s_mb_avg_fragment_size_locks[new]);
+ }
}
/*
--
2.46.1
On Fri, Jun 27, 2025 at 4:04 PM Sasha Levin <sashal(a)kernel.org> wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> ASoC: codec: wcd9335: Convert to GPIO descriptors
>
> to the 6.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> asoc-codec-wcd9335-convert-to-gpio-descriptors.patch
> and it can be found in the queue-6.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
Why is this being backported to stable? It's not a fix, just
refactoring and updating to a more modern API.
Bart
If nvmet receives commands with metadata there is a continuous memory leak
of kmalloc-128 slab or more precisely bio->bi_integrity.
Since that [1] patch series the integrity is not get free at bio_end_io
for submitter owned integrity. It has to free explicitly.
After commit bf4c89fc8797 ("block: don't call bio_uninit from bio_endio")
each user of bio_init has to use bio_uninit as well. Otherwise the bio
integrity is not getting free. Nvmet uses bio_init for inline bios.
Uninit the inline bio to complete deallocation of integrity in bio.
[1] https://lore.kernel.org/all/20240702151047.1746127-1-hch@lst.de/
Cc: stable(a)vger.kernel.org # 6.11
Signed-off-by: Dmitry Bogdanov <d.bogdanov(a)yadro.com>
---
drivers/nvme/target/nvmet.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index df69a9dee71c..51df72f5e89b 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -867,6 +867,8 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio)
{
if (bio != &req->b.inline_bio)
bio_put(bio);
+ else
+ bio_uninit(bio);
}
#ifdef CONFIG_NVME_TARGET_TCP_TLS
--
2.25.1
alloc_tag_top_users() attempts to lock alloc_tag_cttype->mod_lock
even when the alloc_tag_cttype is not allocated because:
1) alloc tagging is disabled because mem profiling is disabled
(!alloc_tag_cttype)
2) alloc tagging is enabled, but not yet initialized (!alloc_tag_cttype)
3) alloc tagging is enabled, but failed initialization
(!alloc_tag_cttype or IS_ERR(alloc_tag_cttype))
In all cases, alloc_tag_cttype is not allocated, and therefore
alloc_tag_top_users() should not attempt to acquire the semaphore.
This leads to a crash on memory allocation failure by attempting to
acquire a non-existent semaphore:
Oops: general protection fault, probably for non-canonical address 0xdffffc000000001b: 0000 [#3] SMP KASAN NOPTI
KASAN: null-ptr-deref in range [0x00000000000000d8-0x00000000000000df]
CPU: 2 UID: 0 PID: 1 Comm: systemd Tainted: G D 6.16.0-rc2 #1 VOLUNTARY
Tainted: [D]=DIE
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:down_read_trylock+0xaa/0x3b0
Code: d0 7c 08 84 d2 0f 85 a0 02 00 00 8b 0d df 31 dd 04 85 c9 75 29 48 b8 00 00 00 00 00 fc ff df 48 8d 6b 68 48 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 88 02 00 00 48 3b 5b 68 0f 85 53 01 00 00 65 ff
RSP: 0000:ffff8881002ce9b8 EFLAGS: 00010016
RAX: dffffc0000000000 RBX: 0000000000000070 RCX: 0000000000000000
RDX: 000000000000001b RSI: 000000000000000a RDI: 0000000000000070
RBP: 00000000000000d8 R08: 0000000000000001 R09: ffffed107dde49d1
R10: ffff8883eef24e8b R11: ffff8881002cec20 R12: 1ffff11020059d37
R13: 00000000003fff7b R14: ffff8881002cec20 R15: dffffc0000000000
FS: 00007f963f21d940(0000) GS:ffff888458ca6000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f963f5edf71 CR3: 000000010672c000 CR4: 0000000000350ef0
Call Trace:
<TASK>
codetag_trylock_module_list+0xd/0x20
alloc_tag_top_users+0x369/0x4b0
__show_mem+0x1cd/0x6e0
warn_alloc+0x2b1/0x390
__alloc_frozen_pages_noprof+0x12b9/0x21a0
alloc_pages_mpol+0x135/0x3e0
alloc_slab_page+0x82/0xe0
new_slab+0x212/0x240
___slab_alloc+0x82a/0xe00
</TASK>
As David Wang points out, this issue became easier to trigger after commit
780138b12381 ("alloc_tag: check mem_profiling_support in alloc_tag_init").
Before the commit, the issue occurred only when it failed to allocate
and initialize alloc_tag_cttype or if a memory allocation fails before
alloc_tag_init() is called. After the commit, it can be easily triggered
when memory profiling is compiled but disabled at boot.
To properly determine whether alloc_tag_init() has been called and
its data structures initialized, verify that alloc_tag_cttype is a valid
pointer before acquiring the semaphore. If the variable is NULL or an error
value, it has not been properly initialized. In such a case, just skip
and do not attempt to acquire the semaphore.
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202506181351.bba867dd-lkp@intel.com
Closes: https://lore.kernel.org/oe-lkp/202506131711.5b41931c-lkp@intel.com
Fixes: 780138b12381 ("alloc_tag: check mem_profiling_support in alloc_tag_init")
Fixes: 1438d349d16b ("lib: add memory allocations report in show_mem()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
---
@Suren: I did not add another pr_warn() because every error path in
alloc_tag_init() already has pr_err().
v2 -> v3:
- Added another Closes: tag (David)
- Moved the condition into a standalone if block for better readability
(Suren)
- Typo fix (Suren)
lib/alloc_tag.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 41ccfb035b7b..e9b33848700a 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -127,6 +127,9 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl
struct codetag_bytes n;
unsigned int i, nr = 0;
+ if (IS_ERR_OR_NULL(alloc_tag_cttype))
+ return 0;
+
if (can_sleep)
codetag_lock_module_list(alloc_tag_cttype, true);
else if (!codetag_trylock_module_list(alloc_tag_cttype))
--
2.43.0
From: Kairui Song <kasong(a)tencent.com>
The current swap-in code assumes that, when a swap entry in shmem mapping
is order 0, its cached folios (if present) must be order 0 too, which
turns out not always correct.
The problem is shmem_split_large_entry is called before verifying the
folio will eventually be swapped in, one possible race is:
CPU1 CPU2
shmem_swapin_folio
/* swap in of order > 0 swap entry S1 */
folio = swap_cache_get_folio
/* folio = NULL */
order = xa_get_order
/* order > 0 */
folio = shmem_swap_alloc_folio
/* mTHP alloc failure, folio = NULL */
<... Interrupted ...>
shmem_swapin_folio
/* S1 is swapped in */
shmem_writeout
/* S1 is swapped out, folio cached */
shmem_split_large_entry(..., S1)
/* S1 is split, but the folio covering it has order > 0 now */
Now any following swapin of S1 will hang: `xa_get_order` returns 0, and
folio lookup will return a folio with order > 0. The
`xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always
return false causing swap-in to return -EEXIST.
And this looks fragile. So fix this up by allowing seeing a larger folio
in swap cache, and check the whole shmem mapping range covered by the
swapin have the right swap value upon inserting the folio. And drop the
redundant tree walks before the insertion.
This will actually improve performance, as it avoids two redundant Xarray
tree walks in the hot path, and the only side effect is that in the
failure path, shmem may redundantly reallocate a few folios causing
temporary slight memory pressure.
And worth noting, it may seems the order and value check before inserting
might help reducing the lock contention, which is not true. The swap
cache layer ensures raced swapin will either see a swap cache folio or
failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is
bypassed), so holding the folio lock and checking the folio flag is
already good enough for avoiding the lock contention. The chance that a
folio passes the swap entry value check but the shmem mapping slot has
changed should be very low.
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Cc: <stable(a)vger.kernel.org>
---
mm/shmem.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 334b7b4a61a0..e3c9a1365ff4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -884,7 +884,9 @@ static int shmem_add_to_page_cache(struct folio *folio,
pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
- long nr = folio_nr_pages(folio);
+ unsigned long nr = folio_nr_pages(folio);
+ swp_entry_t iter, swap;
+ void *entry;
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -896,14 +898,24 @@ static int shmem_add_to_page_cache(struct folio *folio,
gfp &= GFP_RECLAIM_MASK;
folio_throttle_swaprate(folio, gfp);
+ swap = iter = radix_to_swp_entry(expected);
do {
xas_lock_irq(&xas);
- if (expected != xas_find_conflict(&xas)) {
- xas_set_err(&xas, -EEXIST);
- goto unlock;
+ xas_for_each_conflict(&xas, entry) {
+ /*
+ * The range must either be empty, or filled with
+ * expected swap entries. Shmem swap entries are never
+ * partially freed without split of both entry and
+ * folio, so there shouldn't be any holes.
+ */
+ if (!expected || entry != swp_to_radix_entry(iter)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ iter.val += 1 << xas_get_order(&xas);
}
- if (expected && xas_find_conflict(&xas)) {
+ if (expected && iter.val - nr != swap.val) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
@@ -2323,7 +2335,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -ENOMEM;
goto failed;
}
- } else if (order != folio_order(folio)) {
+ } else if (order > folio_order(folio)) {
/*
* Swap readahead may swap in order 0 folios into swapcache
* asynchronously, while the shmem mapping can still stores
@@ -2348,15 +2360,15 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
+ } else if (order < folio_order(folio)) {
+ swap.val = round_down(swap.val, 1 << folio_order(folio));
}
alloced:
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
- folio->swap.val != swap.val ||
- !shmem_confirm_swap(mapping, index, swap) ||
- xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
+ folio->swap.val != swap.val) {
error = -EEXIST;
goto unlock;
}
--
2.50.0
The patch titled
Subject: mm/damon/core: handle damon_call_control as normal under kdmond deactivation
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/core: handle damon_call_control as normal under kdmond deactivation
Date: Sun, 29 Jun 2025 13:49:14 -0700
DAMON sysfs interface internally uses damon_call() to update DAMON
parameters as users requested, online. However, DAMON core cancels any
damon_call() requests when it is deactivated by DAMOS watermarks.
As a result, users cannot change DAMON parameters online while DAMON is
deactivated. Note that users can turn DAMON off and on with different
watermarks to work around. Since deactivated DAMON is nearly same to
stopped DAMON, the work around should have no big problem. Anyway, a bug
is a bug.
There is no real good reason to cancel the damon_call() request under
DAMOS deactivation. Fix it by simply handling the request as normal,
rather than cancelling under the situation.
Link: https://lkml.kernel.org/r/20250629204914.54114-1-sj@kernel.org
Fixes: 42b7491af14c ("mm/damon/core: introduce damon_call()")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.14+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
--- a/mm/damon/core.c~mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation
+++ a/mm/damon/core.c
@@ -2355,9 +2355,8 @@ static void kdamond_usleep(unsigned long
*
* If there is a &struct damon_call_control request that registered via
* &damon_call() on @ctx, do or cancel the invocation of the function depending
- * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS
- * watermarks, or the kdamond is already out of the main loop and therefore
- * will be terminated.
+ * on @cancel. @cancel is set when the kdamond is already out of the main loop
+ * and therefore will be terminated.
*/
static void kdamond_call(struct damon_ctx *ctx, bool cancel)
{
@@ -2405,7 +2404,7 @@ static int kdamond_wait_activation(struc
if (ctx->callback.after_wmarks_check &&
ctx->callback.after_wmarks_check(ctx))
break;
- kdamond_call(ctx, true);
+ kdamond_call(ctx, false);
damos_walk_cancel(ctx);
}
return -EBUSY;
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation.patch
mm-damon-introduce-damon_stat-module.patch
mm-damon-introduce-damon_stat-module-fix.patch
mm-damon-stat-calculate-and-expose-estimated-memory-bandwidth.patch
mm-damon-stat-calculate-and-expose-idle-time-percentiles.patch
docs-admin-guide-mm-damon-add-damon_stat-usage-document.patch
mm-damon-paddr-use-alloc_migartion_target-with-no-migration-fallback-nodemask.patch
revert-mm-rename-alloc_demote_folio-to-alloc_migrate_folio.patch
revert-mm-make-alloc_demote_folio-externally-invokable-for-migration.patch
selftets-damon-add-a-test-for-memcg_path-leak.patch
mm-damon-sysfs-schemes-decouple-from-damos_quota_goal_metric.patch
mm-damon-sysfs-schemes-decouple-from-damos_action.patch
mm-damon-sysfs-schemes-decouple-from-damos_wmark_metric.patch
mm-damon-sysfs-schemes-decouple-from-damos_filter_type.patch
mm-damon-sysfs-decouple-from-damon_ops_id.patch
DAMON sysfs interface internally uses damon_call() to update DAMON
parameters as users requested, online. However, DAMON core cancels any
damon_call() requests when it is deactivated by DAMOS watermarks.
As a result, users cannot change DAMON parameters online while DAMON is
deactivated. Note that users can turn DAMON off and on with different
watermarks to work around. Since deactivated DAMON is nearly same to
stopped DAMON, the work around should have no big problem. Anyway, a
bug is a bug.
There is no real good reason to cancel the damon_call() request under
DAMOS deactivation. Fix it by simply handling the request as normal,
rather than cancelling under the situation.
Fixes: 42b7491af14c ("mm/damon/core: introduce damon_call()")
Cc: stable(a)vger.kernel.org # 6.14.x
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
mm/damon/core.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/mm/damon/core.c b/mm/damon/core.c
index b217e0120e09..bc2e58c1222d 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -2355,9 +2355,8 @@ static void kdamond_usleep(unsigned long usecs)
*
* If there is a &struct damon_call_control request that registered via
* &damon_call() on @ctx, do or cancel the invocation of the function depending
- * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS
- * watermarks, or the kdamond is already out of the main loop and therefore
- * will be terminated.
+ * on @cancel. @cancel is set when the kdamond is already out of the main loop
+ * and therefore will be terminated.
*/
static void kdamond_call(struct damon_ctx *ctx, bool cancel)
{
@@ -2405,7 +2404,7 @@ static int kdamond_wait_activation(struct damon_ctx *ctx)
if (ctx->callback.after_wmarks_check &&
ctx->callback.after_wmarks_check(ctx))
break;
- kdamond_call(ctx, true);
+ kdamond_call(ctx, false);
damos_walk_cancel(ctx);
}
return -EBUSY;
base-commit: 8f6082b6e60e05f9bcd5c39b19ede995a8975283
--
2.39.5
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 178b8ff66ff827c41b4fa105e9aabb99a0b5c537
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062920-conch-hypnotist-d63d@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 178b8ff66ff827c41b4fa105e9aabb99a0b5c537 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Thu, 26 Jun 2025 12:17:48 -0600
Subject: [PATCH] io_uring/kbuf: flag partial buffer mappings
A previous commit aborted mapping more for a non-incremental ring for
bundle peeking, but depending on where in the process this peeking
happened, it would not necessarily prevent a retry by the user. That can
create gaps in the received/read data.
Add struct buf_sel_arg->partial_map, which can pass this information
back. The networking side can then map that to internal state and use it
to gate retry as well.
Since this necessitates a new flag, change io_sr_msg->retry to a
retry_flags member, and store both the retry and partial map condition
in there.
Cc: stable(a)vger.kernel.org
Fixes: 26ec15e4b0c1 ("io_uring/kbuf: don't truncate end buffer for multiple buffer peeks")
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index ce95e3af44a9..f2d2cc319faa 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -271,6 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
if (len > arg->max_len) {
len = arg->max_len;
if (!(bl->flags & IOBL_INC)) {
+ arg->partial_map = 1;
if (iov != arg->iovs)
break;
buf->len = len;
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 5d83c7adc739..723d0361898e 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -58,7 +58,8 @@ struct buf_sel_arg {
size_t max_len;
unsigned short nr_iovs;
unsigned short mode;
- unsigned buf_group;
+ unsigned short buf_group;
+ unsigned short partial_map;
};
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
diff --git a/io_uring/net.c b/io_uring/net.c
index 5c1e8c4ba468..43a43522f406 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -75,12 +75,17 @@ struct io_sr_msg {
u16 flags;
/* initialised and used only by !msg send variants */
u16 buf_group;
- bool retry;
+ unsigned short retry_flags;
void __user *msg_control;
/* used only for send zerocopy */
struct io_kiocb *notif;
};
+enum sr_retry_flags {
+ IO_SR_MSG_RETRY = 1,
+ IO_SR_MSG_PARTIAL_MAP = 2,
+};
+
/*
* Number of times we'll try and do receives if there's more data. If we
* exceed this limit, then add us to the back of the queue and retry from
@@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
sr->len = 0; /* get from the provided buffer */
}
@@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
sr->len = READ_ONCE(sqe->len);
sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~SENDMSG_FLAGS)
@@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -823,7 +828,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
issue_flags);
- if (sr->retry)
+ if (sr->retry_flags & IO_SR_MSG_RETRY)
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
@@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
* If more is available AND it was a full transfer, retry and
* append to this one
*/
- if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
+ if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
!iov_iter_count(&kmsg->msg.msg_iter)) {
req->cqe.flags = cflags & ~CQE_F_MASK;
sr->len = kmsg->msg.msg_inq;
sr->done_io += this_ret;
- sr->retry = true;
+ sr->retry_flags |= IO_SR_MSG_RETRY;
return false;
}
} else {
@@ -1082,6 +1087,8 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
kmsg->vec.iovec = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
+ if (arg.partial_map)
+ sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
/* special case 1 vec, can be a fast path */
if (ret == 1) {
@@ -1276,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int ret;
zc->done_io = 0;
- zc->retry = false;
+ zc->retry_flags = 0;
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
return -EINVAL;
+ stable folks.
On Sun, Jun 29, 2025 at 11:15:13AM -0400, Dennis Clarke wrote:
>
> Code fix for due to https://bugzilla.kernel.org/show_bug.cgi?id=220294
>
> The summary is that recent GCC versions ( 15.1.0 today ) will assume C23
> spec compliance and get upset with ye old A20 address line code bits.
>
> This problem exists previous to the 6.12.35 release tarballs and is
> fixed with commit b3bee1e7c3f2b1b77182302c7b2131c804175870 applied.
>
> The newer GCC revisions will be quite popular soon enough and this may
> bite people when that happens.
I don't know what the rules are for building 6.1-stable with gcc-15 but if
they do that, then 6.1 will need to pick up the abovementioned commit:
b3bee1e7c3f2 ("x86/boot: Compile boot code with -std=gnu11 too")
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x f16873f42a06b620669d48a4b5c3f888cb3653a1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062931-astride-stoneware-df77@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f16873f42a06b620669d48a4b5c3f888cb3653a1 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld(a)intel.com>
Date: Fri, 6 Jun 2025 11:45:48 +0100
Subject: [PATCH] drm/xe: move DPT l2 flush to a more sensible place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Only need the flush for DPT host updates here. Normal GGTT updates don't
need special flush.
Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v6.12+
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
Link: https://lore.kernel.org/r/20250606104546.1996818-4-matthew.auld@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
(cherry picked from commit 35db1da40c8cfd7511dc42f342a133601eb45449)
Signed-off-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index d918ae1c8061..55259969480b 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
vma->dpt = dpt;
vma->node = dpt->ggtt_node[tile0->id];
+
+ /* Ensure DPT writes are flushed */
+ xe_device_l2_flush(xe);
return 0;
}
@@ -333,8 +336,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
- /* Ensure DPT writes are flushed */
- xe_device_l2_flush(xe);
return vma;
err_unpin:
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 344ef45b03336e7f74658814f66483b5417c9cf1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062948-cape-pebble-cad9@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 344ef45b03336e7f74658814f66483b5417c9cf1 Mon Sep 17 00:00:00 2001
From: Ge Yang <yangge1116(a)126.com>
Date: Tue, 27 May 2025 11:36:50 +0800
Subject: [PATCH] mm/hugetlb: remove unnecessary holding of hugetlb_lock
In isolate_or_dissolve_huge_folio(), after acquiring the hugetlb_lock, it
is only for the purpose of obtaining the correct hstate, which is then
passed to alloc_and_dissolve_hugetlb_folio().
alloc_and_dissolve_hugetlb_folio() itself also acquires the hugetlb_lock.
We can have alloc_and_dissolve_hugetlb_folio() obtain the hstate by
itself, so that isolate_or_dissolve_huge_folio() no longer needs to
acquire the hugetlb_lock. In addition, we keep the folio_test_hugetlb()
check within isolate_or_dissolve_huge_folio(). By doing so, we can avoid
disrupting the normal path by vainly holding the hugetlb_lock.
replace_free_hugepage_folios() has the same issue, and we should address
it as well.
Addresses a possible performance problem which was added by the hotfix
113ed54ad276 ("mm/hugetlb: fix kernel NULL pointer dereference when
replacing free hugetlb folios").
Link: https://lkml.kernel.org/r/1748317010-16272-1-git-send-email-yangge1116@126.…
Fixes: 113ed54ad276 ("mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Suggested-by: Oscar Salvador <osalvador(a)suse.de>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8746ed2fec13..9dc95eac558c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2787,20 +2787,24 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
/*
* alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
* the old one
- * @h: struct hstate old page belongs to
* @old_folio: Old folio to dissolve
* @list: List to isolate the page in case we need to
* Returns 0 on success, otherwise negated error.
*/
-static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
- struct folio *old_folio, struct list_head *list)
+static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
+ struct list_head *list)
{
- gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
+ gfp_t gfp_mask;
+ struct hstate *h;
int nid = folio_nid(old_folio);
struct folio *new_folio = NULL;
int ret = 0;
retry:
+ /*
+ * The old_folio might have been dissolved from under our feet, so make sure
+ * to carefully check the state under the lock.
+ */
spin_lock_irq(&hugetlb_lock);
if (!folio_test_hugetlb(old_folio)) {
/*
@@ -2829,8 +2833,10 @@ static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
cond_resched();
goto retry;
} else {
+ h = folio_hstate(old_folio);
if (!new_folio) {
spin_unlock_irq(&hugetlb_lock);
+ gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
NULL, NULL);
if (!new_folio)
@@ -2874,35 +2880,24 @@ static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list)
{
- struct hstate *h;
int ret = -EBUSY;
- /*
- * The page might have been dissolved from under our feet, so make sure
- * to carefully check the state under the lock.
- * Return success when racing as if we dissolved the page ourselves.
- */
- spin_lock_irq(&hugetlb_lock);
- if (folio_test_hugetlb(folio)) {
- h = folio_hstate(folio);
- } else {
- spin_unlock_irq(&hugetlb_lock);
+ /* Not to disrupt normal path by vainly holding hugetlb_lock */
+ if (!folio_test_hugetlb(folio))
return 0;
- }
- spin_unlock_irq(&hugetlb_lock);
/*
* Fence off gigantic pages as there is a cyclic dependency between
* alloc_contig_range and them. Return -ENOMEM as this has the effect
* of bailing out right away without further retrying.
*/
- if (hstate_is_gigantic(h))
+ if (folio_order(folio) > MAX_PAGE_ORDER)
return -ENOMEM;
if (folio_ref_count(folio) && folio_isolate_hugetlb(folio, list))
ret = 0;
else if (!folio_ref_count(folio))
- ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
+ ret = alloc_and_dissolve_hugetlb_folio(folio, list);
return ret;
}
@@ -2916,7 +2911,6 @@ int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list)
*/
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
{
- struct hstate *h;
struct folio *folio;
int ret = 0;
@@ -2925,23 +2919,9 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
while (start_pfn < end_pfn) {
folio = pfn_folio(start_pfn);
- /*
- * The folio might have been dissolved from under our feet, so make sure
- * to carefully check the state under the lock.
- */
- spin_lock_irq(&hugetlb_lock);
- if (folio_test_hugetlb(folio)) {
- h = folio_hstate(folio);
- } else {
- spin_unlock_irq(&hugetlb_lock);
- start_pfn++;
- continue;
- }
- spin_unlock_irq(&hugetlb_lock);
-
- if (!folio_ref_count(folio)) {
- ret = alloc_and_dissolve_hugetlb_folio(h, folio,
- &isolate_list);
+ /* Not to disrupt normal path by vainly holding hugetlb_lock */
+ if (folio_test_hugetlb(folio) && !folio_ref_count(folio)) {
+ ret = alloc_and_dissolve_hugetlb_folio(folio, &isolate_list);
if (ret)
break;
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 3a3c6d61577dbb23c09df3e21f6f9eda1ecd634b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062902-emphasize-calzone-a702@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a3c6d61577dbb23c09df3e21f6f9eda1ecd634b Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Tue, 24 Jun 2025 14:40:34 +0100
Subject: [PATCH] io_uring/rsrc: don't rely on user vaddr alignment
There is no guaranteed alignment for user pointers, however the
calculation of an offset of the first page into a folio after coalescing
uses some weird bit mask logic, get rid of it.
Cc: stable(a)vger.kernel.org
Reported-by: David Hildenbrand <david(a)redhat.com>
Fixes: a8edbb424b139 ("io_uring/rsrc: enable multi-hugepage buffer coalescing")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/io-uring/e387b4c78b33f231105a601d84eefd8301f57954.1…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 0c09e38784c9..afc67530f912 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -734,6 +734,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
data->nr_pages_mid = folio_nr_pages(folio);
data->folio_shift = folio_shift(folio);
+ data->first_folio_page_idx = folio_page_idx(folio, page_array[0]);
/*
* Check if pages are contiguous inside a folio, and all folios have
@@ -827,7 +828,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
if (coalesced)
imu->folio_shift = data.folio_shift;
refcount_set(&imu->refs, 1);
- off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
+
+ off = (unsigned long)iov->iov_base & ~PAGE_MASK;
+ if (coalesced)
+ off += data.first_folio_page_idx << PAGE_SHIFT;
+
node->buf = imu;
ret = 0;
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 0d2138f16322..25e7e998dcfd 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -49,6 +49,7 @@ struct io_imu_folio_data {
unsigned int nr_pages_mid;
unsigned int folio_shift;
unsigned int nr_folios;
+ unsigned long first_folio_page_idx;
};
bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
From: Edip Hazuri <edip(a)medip.dev>
The mute led on this laptop is using ALC245 but requires a quirk to work
This patch enables the existing quirk for the device.
Tested on my friend's Victus 15-fb2xxx Laptop. The LED behaviour works as intended.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Edip Hazuri <edip(a)medip.dev>
---
sound/pci/hda/patch_realtek.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5d6d01ecf..a33e8a654 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10881,6 +10881,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8d07, "HP Victus 15-fb2xxx (MB 8D07)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS),
SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED),
--
2.50.0
Commit 61440628a4ff ("usb: dwc3: gadget: Cleanup SG handling") updated
the TRB reclaim path to use the TRB CHN (Chain) bit to determine whether
a TRB was part of a chain. However, this inadvertently changed the
behavior of reclaiming the final TRB in some scatter-gather or short
transfer cases.
In particular, if the final TRB did not have the CHN bit set, the
cleanup path could incorrectly skip clearing the HWO (Hardware Own)
bit, leaving stale TRBs in the ring. This resulted in broken data
transfer completions in userspace, notably for MTP over FunctionFS.
Fix this by unconditionally clearing the HWO bit during TRB reclaim,
regardless of the CHN bit state. This restores correct behavior
especially for transfers that require ZLPs or end on non-CHN TRBs.
Fixes: 61440628a4ff ("usb: dwc3: gadget: Cleanup SG handling")
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Johannes Schneider <johannes.schneider(a)leica-geosystems.com>
Cc: <stable(a)vger.kernel.org> # v6.13
---
Changes in v4:
- None, patch content is the same
- re-assembled into a patch-series, and re-submission to solve b4 troubles
- Link to v3:
1. https://lore.kernel.org/all/AM8PR06MB7521A29A8863C838B54987B6BC7BA@AM8PR06M…
2. https://lore.kernel.org/all/AM8PR06MB752168CCAF31023017025DD5BC7BA@AM8PR06M…
Changes in v3:
- re-submission as singular patch
- Link to v2: https://lore.kernel.org/r/20250624-dwc3-fix-gadget-mtp-v2-0-0e2d9979328f@le…
Changes in v2:
- None, resubmission as separate patches
- dropped Patch 3, as it did change the logic
- CC to stable
- Link to v1: https://lore.kernel.org/r/20250621-dwc3-fix-gadget-mtp-v1-0-a45e6def71bb@le…
---
drivers/usb/dwc3/gadget.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 321361288935..99fbd29d8f46 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -3516,7 +3516,7 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep,
* We're going to do that here to avoid problems of HW trying
* to use bogus TRBs for transfers.
*/
- if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO))
+ if (trb->ctrl & DWC3_TRB_CTRL_HWO)
trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
/*
--
2.34.1
From: Maud Spierings <maudspierings(a)gocontroll.com>
Throughout the various probe functions &indio_dev->dev is used before it
is initialized. This caused a kernel panic in st_sensors_power_enable()
when the call to devm_regulator_bulk_get_enable() fails and then calls
dev_err_probe() with the uninitialized device.
This seems to only cause a panic with dev_err_probe(), dev_err(),
dev_warn() and dev_info() don't seem to cause a panic, but are fixed
as well.
The issue is reported and traced here: [1]
Link: https://lore.kernel.org/all/AM7P189MB100986A83D2F28AF3FFAF976E39EA@AM7P189M… [1]
Cc: stable(a)vger.kernel.org
Signed-off-by: Maud Spierings <maudspierings(a)gocontroll.com>
---
When I search for general &indio_dev->dev usage, I see quite a lot more
hits, but I am not sure if there are issues with those too.
This issue has existed for a long time it seems and therefore it is
nearly impossible to find a proper fixes tag. I would love to see it at
least backported to 6.12 as that is where I encountered it, and I
believe the patch should apply without conflicts.
---
Changes in v4:
- Put the link to the original issue in a proper link tag
- Remove stray newline
- Link to v3: https://lore.kernel.org/r/20250526-st_iio_fix-v3-1-039fec38707c@gocontroll.…
Changes in v3:
- Added the stable cc to the commit message
- Move the link to the original issue to the commit message
- Fix function notation in the commit message
- Move some more of the dev_*() calls to one line
- Link to v2: https://lore.kernel.org/r/20250522-st_iio_fix-v2-1-07a32655a996@gocontroll.…
Changes in v2:
- Added SoB in commit message
- Link to v1: https://lore.kernel.org/r/20250522-st_iio_fix-v1-1-d689b35f1612@gocontroll.…
---
drivers/iio/accel/st_accel_core.c | 10 +++---
drivers/iio/common/st_sensors/st_sensors_core.c | 36 ++++++++++------------
drivers/iio/common/st_sensors/st_sensors_trigger.c | 20 ++++++------
3 files changed, 31 insertions(+), 35 deletions(-)
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 99cb661fabb2d9cc1943fa8d0a6f3becb71126e6..a7961c610ed203d039bbf298c8883031a578fb0b 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1353,6 +1353,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
union acpi_object *ont;
union acpi_object *elements;
acpi_status status;
+ struct device *parent = indio_dev->dev.parent;
int ret = -EINVAL;
unsigned int val;
int i, j;
@@ -1371,7 +1372,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
};
- adev = ACPI_COMPANION(indio_dev->dev.parent);
+ adev = ACPI_COMPANION(parent);
if (!adev)
return -ENXIO;
@@ -1380,8 +1381,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
if (status == AE_NOT_FOUND) {
return -ENXIO;
} else if (ACPI_FAILURE(status)) {
- dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n",
- status);
+ dev_warn(parent, "failed to execute _ONT: %d\n", status);
return status;
}
@@ -1457,12 +1457,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
}
ret = 0;
- dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n");
+ dev_info(parent, "computed mount matrix from ACPI\n");
out:
kfree(buffer.pointer);
if (ret)
- dev_dbg(&indio_dev->dev,
+ dev_dbg(parent,
"failed to apply ACPI orientation data: %d\n", ret);
return ret;
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 8ce1dccfea4f5aaff45d3d40f6542323dd1f0b09..dac593be56958fd0be92e13f628350fcfd0f040d 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -154,7 +154,7 @@ static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs)
return err;
st_accel_set_fullscale_error:
- dev_err(&indio_dev->dev, "failed to set new fullscale.\n");
+ dev_err(indio_dev->dev.parent, "failed to set new fullscale.\n");
return err;
}
@@ -231,8 +231,7 @@ int st_sensors_power_enable(struct iio_dev *indio_dev)
ARRAY_SIZE(regulator_names),
regulator_names);
if (err)
- return dev_err_probe(&indio_dev->dev, err,
- "unable to enable supplies\n");
+ return dev_err_probe(parent, err, "unable to enable supplies\n");
return 0;
}
@@ -241,13 +240,14 @@ EXPORT_SYMBOL_NS(st_sensors_power_enable, "IIO_ST_SENSORS");
static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
/* Sensor does not support interrupts */
if (!sdata->sensor_settings->drdy_irq.int1.addr &&
!sdata->sensor_settings->drdy_irq.int2.addr) {
if (pdata->drdy_int_pin)
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"DRDY on pin INT%d specified, but sensor does not support interrupts\n",
pdata->drdy_int_pin);
return 0;
@@ -256,29 +256,27 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
switch (pdata->drdy_int_pin) {
case 1:
if (!sdata->sensor_settings->drdy_irq.int1.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT1 not available.\n");
+ dev_err(parent, "DRDY on INT1 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 1;
break;
case 2:
if (!sdata->sensor_settings->drdy_irq.int2.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT2 not available.\n");
+ dev_err(parent, "DRDY on INT2 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 2;
break;
default:
- dev_err(&indio_dev->dev, "DRDY on pdata not valid.\n");
+ dev_err(parent, "DRDY on pdata not valid.\n");
return -EINVAL;
}
if (pdata->open_drain) {
if (!sdata->sensor_settings->drdy_irq.int1.addr_od &&
!sdata->sensor_settings->drdy_irq.int2.addr_od)
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"open drain requested but unsupported.\n");
else
sdata->int_pin_open_drain = true;
@@ -336,6 +334,7 @@ EXPORT_SYMBOL_NS(st_sensors_dev_name_probe, "IIO_ST_SENSORS");
int st_sensors_init_sensor(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
struct st_sensors_platform_data *of_pdata;
int err = 0;
@@ -343,7 +342,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mutex_init(&sdata->odr_lock);
/* If OF/DT pdata exists, it will take precedence of anything else */
- of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata);
+ of_pdata = st_sensors_dev_probe(parent, pdata);
if (IS_ERR(of_pdata))
return PTR_ERR(of_pdata);
if (of_pdata)
@@ -370,7 +369,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
if (err < 0)
return err;
} else
- dev_info(&indio_dev->dev, "Full-scale not possible\n");
+ dev_info(parent, "Full-scale not possible\n");
err = st_sensors_set_odr(indio_dev, sdata->odr);
if (err < 0)
@@ -405,7 +404,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mask = sdata->sensor_settings->drdy_irq.int2.mask_od;
}
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"set interrupt line to open drain mode on pin %d\n",
sdata->drdy_int_pin);
err = st_sensors_write_data_with_mask(indio_dev, addr,
@@ -593,21 +592,20 @@ EXPORT_SYMBOL_NS(st_sensors_get_settings_index, "IIO_ST_SENSORS");
int st_sensors_verify_id(struct iio_dev *indio_dev)
{
struct st_sensor_data *sdata = iio_priv(indio_dev);
+ struct device *parent = indio_dev->dev.parent;
int wai, err;
if (sdata->sensor_settings->wai_addr) {
err = regmap_read(sdata->regmap,
sdata->sensor_settings->wai_addr, &wai);
if (err < 0) {
- dev_err(&indio_dev->dev,
- "failed to read Who-Am-I register.\n");
- return err;
+ return dev_err_probe(parent, err,
+ "failed to read Who-Am-I register.\n");
}
if (sdata->sensor_settings->wai != wai) {
- dev_warn(&indio_dev->dev,
- "%s: WhoAmI mismatch (0x%x).\n",
- indio_dev->name, wai);
+ dev_warn(parent, "%s: WhoAmI mismatch (0x%x).\n",
+ indio_dev->name, wai);
}
}
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 9d4bf822a15dfcdd6c2835f6b9d7698cd3cb0b08..8a8ab688d7980f6dd43c660f90a0eba32c38388b 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -127,7 +127,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig = devm_iio_trigger_alloc(parent, "%s-trigger",
indio_dev->name);
if (sdata->trig == NULL) {
- dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n");
+ dev_err(parent, "failed to allocate iio trigger.\n");
return -ENOMEM;
}
@@ -143,7 +143,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
case IRQF_TRIGGER_FALLING:
case IRQF_TRIGGER_LOW:
if (!sdata->sensor_settings->drdy_irq.addr_ihl) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"falling/low specified for IRQ but hardware supports only rising/high: will request rising/high\n");
if (irq_trig == IRQF_TRIGGER_FALLING)
irq_trig = IRQF_TRIGGER_RISING;
@@ -156,21 +156,19 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->sensor_settings->drdy_irq.mask_ihl, 1);
if (err < 0)
return err;
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts on the falling edge or active low level\n");
}
break;
case IRQF_TRIGGER_RISING:
- dev_info(&indio_dev->dev,
- "interrupts on the rising edge\n");
+ dev_info(parent, "interrupts on the rising edge\n");
break;
case IRQF_TRIGGER_HIGH:
- dev_info(&indio_dev->dev,
- "interrupts active high level\n");
+ dev_info(parent, "interrupts active high level\n");
break;
default:
/* This is the most preferred mode, if possible */
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"unsupported IRQ trigger specified (%lx), enforce rising edge\n", irq_trig);
irq_trig = IRQF_TRIGGER_RISING;
}
@@ -179,7 +177,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
if (irq_trig == IRQF_TRIGGER_FALLING ||
irq_trig == IRQF_TRIGGER_RISING) {
if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"edge IRQ not supported w/o stat register.\n");
return -EOPNOTSUPP;
}
@@ -214,13 +212,13 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig->name,
sdata->trig);
if (err) {
- dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n");
+ dev_err(parent, "failed to request trigger IRQ.\n");
return err;
}
err = devm_iio_trigger_register(parent, sdata->trig);
if (err < 0) {
- dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
+ dev_err(parent, "failed to register iio trigger.\n");
return err;
}
indio_dev->trig = iio_trigger_get(sdata->trig);
---
base-commit: 7bac2c97af4078d7a627500c9bcdd5b033f97718
change-id: 20250522-st_iio_fix-1c58fdd4d420
Best regards,
--
Maud Spierings <maudspierings(a)gocontroll.com>
This patch series implements FIPS 140-3 compliance requirements for random
number generation in the Linux kernel 6.12. The changes ensure that when the
kernel is operating in FIPS mode, FIPS-compliant random number
generators are used instead of the default /dev/random implementation.
IMPORTANT: These two patches must be applied together as a series. Applying
only the first patch without the second will cause a deadlock during boot
in FIPS-enabled environments. The second patch fixes a critical timing issue
introduced by the first patch where the crypto RNG attempts to override the
drivers/char/random interface before the default RNG becomes available.
The series consists of two patches:
1. Initial implementation to override drivers/char/random in FIPS mode
2. Refinement to ensure override only occurs after FIPS-mode RNGs are available
These 2 patches are required for FIPS 140-3 certification
and compliance in government and enterprise environments.
Herbert Xu (1):
crypto: rng - Override drivers/char/random in FIPS mode
Jay Wang (1):
Override drivers/char/random only after FIPS-mode RNGs become
available
crypto/rng.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
--
2.47.1
The following commit has been merged into the ras/urgent branch of tip:
Commit-ID: 4c113a5b28bfd589e2010b5fc8867578b0135ed7
Gitweb: https://git.kernel.org/tip/4c113a5b28bfd589e2010b5fc8867578b0135ed7
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Tue, 24 Jun 2025 14:15:56
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Thu, 26 Jun 2025 17:28:13 +02:00
x86/mce: Don't remove sysfs if thresholding sysfs init fails
Currently, the MCE subsystem sysfs interface will be removed if the
thresholding sysfs interface fails to be created. A common failure is due to
new MCA bank types that are not recognized and don't have a short name set.
The MCA thresholding feature is optional and should not break the common MCE
sysfs interface. Also, new MCA bank types are occasionally introduced, and
updates will be needed to recognize them. But likewise, this should not break
the common sysfs interface.
Keep the MCE sysfs interface regardless of the status of the thresholding
sysfs interface.
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Tested-by: Tony Luck <tony.luck(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20250624-wip-mca-updates-v4-1-236dd74f645f@amd.com
---
arch/x86/kernel/cpu/mce/core.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index e9b3c5d..07d6193 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2801,15 +2801,9 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- int ret;
mce_device_create(cpu);
-
- ret = mce_threshold_create_device(cpu);
- if (ret) {
- mce_device_remove(cpu);
- return ret;
- }
+ mce_threshold_create_device(cpu);
mce_reenable_cpu();
mce_start_timer(t);
return 0;
The following commit has been merged into the ras/urgent branch of tip:
Commit-ID: 00c092de6f28ebd32208aef83b02d61af2229b60
Gitweb: https://git.kernel.org/tip/00c092de6f28ebd32208aef83b02d61af2229b60
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Tue, 24 Jun 2025 14:15:57
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Fri, 27 Jun 2025 12:41:44 +02:00
x86/mce: Ensure user polling settings are honored when restarting timer
Users can disable MCA polling by setting the "ignore_ce" parameter or by
setting "check_interval=0". This tells the kernel to *not* start the MCE
timer on a CPU.
If the user did not disable CMCI, then storms can occur. When these
happen, the MCE timer will be started with a fixed interval. After the
storm subsides, the timer's next interval is set to check_interval.
This disregards the user's input through "ignore_ce" and
"check_interval". Furthermore, if "check_interval=0", then the new timer
will run faster than expected.
Create a new helper to check these conditions and use it when a CMCI
storm ends.
[ bp: Massage. ]
Fixes: 7eae17c4add5 ("x86/mce: Add per-bank CMCI storm mitigation")
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20250624-wip-mca-updates-v4-2-236dd74f645f@amd.com
---
arch/x86/kernel/cpu/mce/core.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 07d6193..4da4eab 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void)
void (*mc_poll_banks)(void) = mc_poll_banks_default;
+static bool should_enable_timer(unsigned long iv)
+{
+ return !mca_cfg.ignore_ce && iv;
+}
+
static void mce_timer_fn(struct timer_list *t)
{
struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t)
if (mce_get_storm_mode()) {
__start_timer(t, HZ);
- } else {
+ } else if (should_enable_timer(iv)) {
__this_cpu_write(mce_next_interval, iv);
__start_timer(t, iv);
}
@@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
- if (mca_cfg.ignore_ce || !iv)
- return;
-
- this_cpu_write(mce_next_interval, iv);
- __start_timer(t, iv);
+ if (should_enable_timer(iv)) {
+ this_cpu_write(mce_next_interval, iv);
+ __start_timer(t, iv);
+ }
}
static void __mcheck_cpu_setup_timer(void)
The following commit has been merged into the ras/urgent branch of tip:
Commit-ID: d66e1e90b16055d2f0ee76e5384e3f119c3c2773
Gitweb: https://git.kernel.org/tip/d66e1e90b16055d2f0ee76e5384e3f119c3c2773
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Tue, 24 Jun 2025 14:15:58
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Fri, 27 Jun 2025 13:13:36 +02:00
x86/mce/amd: Add default names for MCA banks and blocks
Ensure that sysfs init doesn't fail for new/unrecognized bank types or if
a bank has additional blocks available.
Most MCA banks have a single thresholding block, so the block takes the same
name as the bank.
Unified Memory Controllers (UMCs) are a special case where there are two
blocks and each has a unique name.
However, the microarchitecture allows for five blocks. Any new MCA bank types
with more than one block will be missing names for the extra blocks. The MCE
sysfs will fail to initialize in this case.
Fixes: 87a6d4091bd7 ("x86/mce/AMD: Update sysfs bank names for SMCA systems")
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20250624-wip-mca-updates-v4-3-236dd74f645f@amd.com
---
arch/x86/kernel/cpu/mce/amd.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9d852c3..6820ebc 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -1113,13 +1113,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
}
bank_type = smca_get_bank_type(cpu, bank);
- if (bank_type >= N_SMCA_BANK_TYPES)
- return NULL;
if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
- return NULL;
+ }
+
+ if (b && b->block) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
+ return buf_mcatype;
+ }
+
+ if (bank_type >= N_SMCA_BANK_TYPES) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
+ return buf_mcatype;
}
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
The following commit has been merged into the ras/urgent branch of tip:
Commit-ID: 5f6e3b720694ad771911f637a51930f511427ce1
Gitweb: https://git.kernel.org/tip/5f6e3b720694ad771911f637a51930f511427ce1
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Tue, 24 Jun 2025 14:15:59
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Fri, 27 Jun 2025 13:16:23 +02:00
x86/mce/amd: Fix threshold limit reset
The MCA threshold limit must be reset after servicing the interrupt.
Currently, the restart function doesn't have an explicit check for this. It
makes some assumptions based on the current limit and what's in the registers.
These assumptions don't always hold, so the limit won't be reset in some
cases.
Make the reset condition explicit. Either an interrupt/overflow has occurred
or the bank is being initialized.
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20250624-wip-mca-updates-v4-4-236dd74f645f@amd.com
---
arch/x86/kernel/cpu/mce/amd.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 6820ebc..5c4eb28 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
struct thresh_restart {
struct threshold_block *b;
- int reset;
int set_lvt_off;
int lvt_off;
u16 old_limit;
@@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr)
rdmsr(tr->b->address, lo, hi);
- if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
- tr->reset = 1; /* limit cannot be lower than err count */
-
- if (tr->reset) { /* reset err count and overflow bit */
- hi =
- (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
- (THRESHOLD_MAX - tr->b->threshold_limit);
+ /*
+ * Reset error count and overflow bit.
+ * This is done during init or after handling an interrupt.
+ */
+ if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
+ hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
+ hi |= THRESHOLD_MAX - tr->b->threshold_limit;
} else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
From: Lance Yang <lance.yang(a)linux.dev>
As pointed out by David[1], the batched unmap logic in try_to_unmap_one()
can read past the end of a PTE table if a large folio is mapped starting at
the last entry of that table. It would be quite rare in practice, as
MADV_FREE typically splits the large folio ;)
So let's fix the potential out-of-bounds read by refactoring the logic into
a new helper, folio_unmap_pte_batch().
The new helper now correctly calculates the safe number of pages to scan by
limiting the operation to the boundaries of the current VMA and the PTE
table.
In addition, the "all-or-nothing" batching restriction is removed to
support partial batches. The reference counting is also cleaned up to use
folio_put_refs().
[1] https://lore.kernel.org/linux-mm/a694398c-9f03-4737-81b9-7e49c857fcbe@redha…
Fixes: 354dffd29575 ("mm: support batched unmap for lazyfree large folios during reclamation")
Cc: <stable(a)vger.kernel.org>
Suggested-by: David Hildenbrand <david(a)redhat.com>
Suggested-by: Barry Song <baohua(a)kernel.org>
Signed-off-by: Lance Yang <lance.yang(a)linux.dev>
---
v1 -> v2:
- Update subject and changelog (per Barry)
- https://lore.kernel.org/linux-mm/20250627025214.30887-1-lance.yang@linux.dev
mm/rmap.c | 46 ++++++++++++++++++++++++++++------------------
1 file changed, 28 insertions(+), 18 deletions(-)
diff --git a/mm/rmap.c b/mm/rmap.c
index fb63d9256f09..1320b88fab74 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page,
#endif
}
-/* We support batch unmapping of PTEs for lazyfree large folios */
-static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
- struct folio *folio, pte_t *ptep)
+static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
+ struct page_vma_mapped_walk *pvmw,
+ enum ttu_flags flags, pte_t pte)
{
const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
- int max_nr = folio_nr_pages(folio);
- pte_t pte = ptep_get(ptep);
+ unsigned long end_addr, addr = pvmw->address;
+ struct vm_area_struct *vma = pvmw->vma;
+ unsigned int max_nr;
+
+ if (flags & TTU_HWPOISON)
+ return 1;
+ if (!folio_test_large(folio))
+ return 1;
+ /* We may only batch within a single VMA and a single page table. */
+ end_addr = pmd_addr_end(addr, vma->vm_end);
+ max_nr = (end_addr - addr) >> PAGE_SHIFT;
+
+ /* We only support lazyfree batching for now ... */
if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
- return false;
+ return 1;
if (pte_unused(pte))
- return false;
- if (pte_pfn(pte) != folio_pfn(folio))
- return false;
+ return 1;
- return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
- NULL, NULL) == max_nr;
+ return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags,
+ NULL, NULL, NULL);
}
/*
@@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (pte_dirty(pteval))
folio_mark_dirty(folio);
} else if (likely(pte_present(pteval))) {
- if (folio_test_large(folio) && !(flags & TTU_HWPOISON) &&
- can_batch_unmap_folio_ptes(address, folio, pvmw.pte))
- nr_pages = folio_nr_pages(folio);
+ nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval);
end_addr = address + nr_pages * PAGE_SIZE;
flush_cache_range(vma, address, end_addr);
@@ -2206,13 +2213,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
hugetlb_remove_rmap(folio);
} else {
folio_remove_rmap_ptes(folio, subpage, nr_pages, vma);
- folio_ref_sub(folio, nr_pages - 1);
}
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
- folio_put(folio);
- /* We have already batched the entire folio */
- if (nr_pages > 1)
+ folio_put_refs(folio, nr_pages);
+
+ /*
+ * If we are sure that we batched the entire folio and cleared
+ * all PTEs, we can just optimize and stop right here.
+ */
+ if (nr_pages == folio_nr_pages(folio))
goto walk_done;
continue;
walk_abort:
--
2.49.0
The patch titled
Subject: mm/shmem, swap: improve cached mTHP handling and fix potential hung
has been added to the -mm mm-new branch. Its filename is
mm-shmem-swap-improve-cached-mthp-handling-and-fix-potential-hung.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kairui Song <kasong(a)tencent.com>
Subject: mm/shmem, swap: improve cached mTHP handling and fix potential hung
Date: Fri, 27 Jun 2025 14:20:14 +0800
Patch series "mm/shmem, swap: bugfix and improvement of mTHP swap in", v3.
The current mTHP swapin path have some problems. It may potentially hang,
may cause redundant faults due to false positive swap cache lookup, and it
will involve at least 4 Xarray tree walks (get order, get order again,
confirm swap, insert folio). And for !CONFIG_TRANSPARENT_HUGEPAGE builds,
it will performs some mTHP related checks.
This series fixes all of the mentioned issues, and the code should be more
robust and prepared for the swap table series. Now tree walks is reduced
to twice (get order & confirm, insert folio) and added more sanity checks
and comments. !CONFIG_TRANSPARENT_HUGEPAGE build overhead is also
minimized, and comes with a sanity check now.
The performance is slightly better after this series, sequential swap in
of 24G data from ZRAM, using transparent_hugepage_tmpfs=always (24 samples
each):
Before: 11.17, Standard Deviation: 0.02
After patch 1: 10.89, Standard Deviation: 0.05
After patch 2: 10.84, Standard Deviation: 0.03
After patch 3: 10.91, Standard Deviation: 0.03
After patch 4: 10.86, Standard Deviation: 0.03
After patch 5: 10.07, Standard Deviation: 0.04
After patch 7: 10.09, Standard Deviation: 0.03
Each patch improves the performance by a little, which is about ~10%
faster in total.
Build kernel test showed very slightly improvement, testing with make -j24
with defconfig in a 256M memcg also using ZRAM as swap, and
transparent_hugepage_tmpfs=always (6 test runs):
Before: system time avg: 3911.80s
After: system time avg: 3863.76s
This patch (of 7):
The current swap-in code assumes that, when a swap entry in shmem mapping
is order 0, its cached folios (if present) must be order 0 too, which
turns out not always correct.
The problem is shmem_split_large_entry is called before verifying the
folio will eventually be swapped in, one possible race is:
CPU1 CPU2
shmem_swapin_folio
/* swap in of order > 0 swap entry S1 */
folio = swap_cache_get_folio
/* folio = NULL */
order = xa_get_order
/* order > 0 */
folio = shmem_swap_alloc_folio
/* mTHP alloc failure, folio = NULL */
<... Interrupted ...>
shmem_swapin_folio
/* S1 is swapped in */
shmem_writeout
/* S1 is swapped out, folio cached */
shmem_split_large_entry(..., S1)
/* S1 is split, but the folio covering it has order > 0 now */
Now any following swapin of S1 will hang: `xa_get_order` returns 0, and
folio lookup will return a folio with order > 0. The
`xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always
return false causing swap-in to return -EEXIST.
And this looks fragile. So fix this up by allowing seeing a larger folio
in swap cache, and check the whole shmem mapping range covered by the
swapin have the right swap value upon inserting the folio. And drop the
redundant tree walks before the insertion.
This will actually improve performance, as it avoids two redundant Xarray
tree walks in the hot path, and the only side effect is that in the
failure path, shmem may redundantly reallocate a few folios causing
temporary slight memory pressure.
And worth noting, it may seems the order and value check before inserting
might help reducing the lock contention, which is not true. The swap
cache layer ensures raced swapin will either see a swap cache folio or
failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is
bypassed), so holding the folio lock and checking the folio flag is
already good enough for avoiding the lock contention. The chance that a
folio passes the swap entry value check but the shmem mapping slot has
changed should be very low.
Link: https://lkml.kernel.org/r/20250627062020.534-2-ryncsn@gmail.com
Link: https://lkml.kernel.org/r/20250627062020.534-2-ryncsn@gmail.com
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Chris Li <chrisl(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/shmem.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
--- a/mm/shmem.c~mm-shmem-swap-improve-cached-mthp-handling-and-fix-potential-hung
+++ a/mm/shmem.c
@@ -884,7 +884,9 @@ static int shmem_add_to_page_cache(struc
pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
- long nr = folio_nr_pages(folio);
+ unsigned long nr = folio_nr_pages(folio);
+ swp_entry_t iter, swap;
+ void *entry;
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -896,14 +898,24 @@ static int shmem_add_to_page_cache(struc
gfp &= GFP_RECLAIM_MASK;
folio_throttle_swaprate(folio, gfp);
+ swap = iter = radix_to_swp_entry(expected);
do {
xas_lock_irq(&xas);
- if (expected != xas_find_conflict(&xas)) {
- xas_set_err(&xas, -EEXIST);
- goto unlock;
+ xas_for_each_conflict(&xas, entry) {
+ /*
+ * The range must either be empty, or filled with
+ * expected swap entries. Shmem swap entries are never
+ * partially freed without split of both entry and
+ * folio, so there shouldn't be any holes.
+ */
+ if (!expected || entry != swp_to_radix_entry(iter)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ iter.val += 1 << xas_get_order(&xas);
}
- if (expected && xas_find_conflict(&xas)) {
+ if (expected && iter.val - nr != swap.val) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
@@ -2323,7 +2335,7 @@ static int shmem_swapin_folio(struct ino
error = -ENOMEM;
goto failed;
}
- } else if (order != folio_order(folio)) {
+ } else if (order > folio_order(folio)) {
/*
* Swap readahead may swap in order 0 folios into swapcache
* asynchronously, while the shmem mapping can still stores
@@ -2348,15 +2360,15 @@ static int shmem_swapin_folio(struct ino
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
+ } else if (order < folio_order(folio)) {
+ swap.val = round_down(swap.val, 1 << folio_order(folio));
}
alloced:
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
- folio->swap.val != swap.val ||
- !shmem_confirm_swap(mapping, index, swap) ||
- xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
+ folio->swap.val != swap.val) {
error = -EEXIST;
goto unlock;
}
_
Patches currently in -mm which might be from kasong(a)tencent.com are
mm-list_lru-refactor-the-locking-code.patch
mm-shmem-swap-improve-cached-mthp-handling-and-fix-potential-hung.patch
mm-shmem-swap-avoid-redundant-xarray-lookup-during-swapin.patch
mm-shmem-swap-tidy-up-thp-swapin-checks.patch
mm-shmem-swap-clean-up-swap-entry-splitting.patch
mm-shmem-swap-never-use-swap-cache-and-readahead-for-swp_synchronous_io.patch
mm-shmem-swap-fix-major-fault-counting.patch
mm-shmem-swap-avoid-false-positive-swap-cache-lookup.patch
The patch titled
Subject: mm/rmap: fix potential out-of-bounds page table access during batched unmap
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Lance Yang <lance.yang(a)linux.dev>
Subject: mm/rmap: fix potential out-of-bounds page table access during batched unmap
Date: Fri, 27 Jun 2025 14:23:19 +0800
As pointed out by David[1], the batched unmap logic in try_to_unmap_one()
can read past the end of a PTE table if a large folio is mapped starting
at the last entry of that table. It would be quite rare in practice, as
MADV_FREE typically splits the large folio ;)
So let's fix the potential out-of-bounds read by refactoring the logic
into a new helper, folio_unmap_pte_batch().
The new helper now correctly calculates the safe number of pages to scan
by limiting the operation to the boundaries of the current VMA and the PTE
table.
In addition, the "all-or-nothing" batching restriction is removed to
support partial batches. The reference counting is also cleaned up to use
folio_put_refs().
[1] https://lore.kernel.org/linux-mm/a694398c-9f03-4737-81b9-7e49c857fcbe@redha…
Link: https://lkml.kernel.org/r/20250627062319.84936-1-lance.yang@linux.dev
Fixes: 354dffd29575 ("mm: support batched unmap for lazyfree large folios during reclamation")
Signed-off-by: Lance Yang <lance.yang(a)linux.dev>
Suggested-by: David Hildenbrand <david(a)redhat.com>
Suggested-by: Barry Song <baohua(a)kernel.org>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <v-songbaohua(a)oppo.com>
Cc: Chris Li <chrisl(a)kernel.org>
Cc: "Huang, Ying" <huang.ying.caritas(a)gmail.com>
Cc: Kairui Song <kasong(a)tencent.com>
Cc: Lance Yang <lance.yang(a)linux.dev>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mingzhe Yang <mingzhe.yang(a)ly.com>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Tangquan Zheng <zhengtangquan(a)oppo.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/rmap.c | 46 ++++++++++++++++++++++++++++------------------
1 file changed, 28 insertions(+), 18 deletions(-)
--- a/mm/rmap.c~mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap
+++ a/mm/rmap.c
@@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio
#endif
}
-/* We support batch unmapping of PTEs for lazyfree large folios */
-static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
- struct folio *folio, pte_t *ptep)
+static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
+ struct page_vma_mapped_walk *pvmw,
+ enum ttu_flags flags, pte_t pte)
{
const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
- int max_nr = folio_nr_pages(folio);
- pte_t pte = ptep_get(ptep);
+ unsigned long end_addr, addr = pvmw->address;
+ struct vm_area_struct *vma = pvmw->vma;
+ unsigned int max_nr;
+
+ if (flags & TTU_HWPOISON)
+ return 1;
+ if (!folio_test_large(folio))
+ return 1;
+
+ /* We may only batch within a single VMA and a single page table. */
+ end_addr = pmd_addr_end(addr, vma->vm_end);
+ max_nr = (end_addr - addr) >> PAGE_SHIFT;
+ /* We only support lazyfree batching for now ... */
if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
- return false;
+ return 1;
if (pte_unused(pte))
- return false;
- if (pte_pfn(pte) != folio_pfn(folio))
- return false;
+ return 1;
- return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
- NULL, NULL) == max_nr;
+ return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags,
+ NULL, NULL, NULL);
}
/*
@@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct foli
if (pte_dirty(pteval))
folio_mark_dirty(folio);
} else if (likely(pte_present(pteval))) {
- if (folio_test_large(folio) && !(flags & TTU_HWPOISON) &&
- can_batch_unmap_folio_ptes(address, folio, pvmw.pte))
- nr_pages = folio_nr_pages(folio);
+ nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval);
end_addr = address + nr_pages * PAGE_SIZE;
flush_cache_range(vma, address, end_addr);
@@ -2206,13 +2213,16 @@ discard:
hugetlb_remove_rmap(folio);
} else {
folio_remove_rmap_ptes(folio, subpage, nr_pages, vma);
- folio_ref_sub(folio, nr_pages - 1);
}
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
- folio_put(folio);
- /* We have already batched the entire folio */
- if (nr_pages > 1)
+ folio_put_refs(folio, nr_pages);
+
+ /*
+ * If we are sure that we batched the entire folio and cleared
+ * all PTEs, we can just optimize and stop right here.
+ */
+ if (nr_pages == folio_nr_pages(folio))
goto walk_done;
continue;
walk_abort:
_
Patches currently in -mm which might be from lance.yang(a)linux.dev are
mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap.patch
The "is_waiting" flag was updated after calling complete(), which could
lead to a race where the waiting thread wakes up before the flag is
cleared, may cause a missed wakeup or stale state check.
Reorder the operations to update "is_waiting" before signaling completion
to ensure consistent state.
Fixes: 824a156633df ("scsi: mpi3mr: Base driver code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Chandrakanth Patil <chandrakanth.patil(a)broadcom.com>
Signed-off-by: Ranjan Kumar <ranjan.kumar(a)broadcom.com>
---
drivers/scsi/mpi3mr/mpi3mr_fw.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 1d7901a8f0e4..0186676698d4 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -428,8 +428,8 @@ static void mpi3mr_process_admin_reply_desc(struct mpi3mr_ioc *mrioc,
MPI3MR_SENSE_BUF_SZ);
}
if (cmdptr->is_waiting) {
- complete(&cmdptr->done);
cmdptr->is_waiting = 0;
+ complete(&cmdptr->done);
} else if (cmdptr->callback)
cmdptr->callback(mrioc, cmdptr);
}
--
2.31.1
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0ec33c81d9c7342f03864101ddb2e717a0cce03e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062018-ahead-armory-59ac@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0ec33c81d9c7342f03864101ddb2e717a0cce03e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Tue, 27 May 2025 18:07:33 +0100
Subject: [PATCH] io_uring/zcrx: fix area release on registration failure
On area registration failure there might be no ifq set and it's not safe
to access area->ifq in the release path without checking it first.
Cc: stable(a)vger.kernel.org
Fixes: f12ecf5e1c5ec ("io_uring/zcrx: fix late dma unmap for a dead dev")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/bc02878678a5fec28bc77d33355cdba735418484.17483656…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 0c5b7d8f8d67..21c816c3bfe0 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -366,7 +366,8 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
static void io_zcrx_free_area(struct io_zcrx_area *area)
{
- io_zcrx_unmap_area(area->ifq, area);
+ if (area->ifq)
+ io_zcrx_unmap_area(area->ifq, area);
io_release_area_mem(&area->mem);
kvfree(area->freelist);
Flush dbc requests when dbc is stopped and transfer rings are freed.
Failure to flush them lead to leaking memory and dbc completing odd
requests after resuming from suspend, leading to error messages such as:
[ 95.344392] xhci_hcd 0000:00:0d.0: no matched request
Cc: stable(a)vger.kernel.org
Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver")
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-dbgcap.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c
index 0d4ce5734165..06a2edb9e86e 100644
--- a/drivers/usb/host/xhci-dbgcap.c
+++ b/drivers/usb/host/xhci-dbgcap.c
@@ -652,6 +652,10 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc)
case DS_DISABLED:
return;
case DS_CONFIGURED:
+ spin_lock(&dbc->lock);
+ xhci_dbc_flush_requests(dbc);
+ spin_unlock(&dbc->lock);
+
if (dbc->driver->disconnect)
dbc->driver->disconnect(dbc);
break;
--
2.43.0
From: Łukasz Bartosik <ukaszb(a)chromium.org>
When /dev/ttyDBC0 device is created then by default ECHO flag
is set for the terminal device. However if data arrives from
a peer before application using /dev/ttyDBC0 applies its set
of terminal flags then the arriving data will be echoed which
might not be desired behavior.
Fixes: 4521f1613940 ("xhci: dbctty: split dbc tty driver registration and unregistration functions.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Łukasz Bartosik <ukaszb(a)chromium.org>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-dbgtty.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
index 60ed753c85bb..d894081d8d15 100644
--- a/drivers/usb/host/xhci-dbgtty.c
+++ b/drivers/usb/host/xhci-dbgtty.c
@@ -617,6 +617,7 @@ int dbc_tty_init(void)
dbc_tty_driver->type = TTY_DRIVER_TYPE_SERIAL;
dbc_tty_driver->subtype = SERIAL_TYPE_NORMAL;
dbc_tty_driver->init_termios = tty_std_termios;
+ dbc_tty_driver->init_termios.c_lflag &= ~ECHO;
dbc_tty_driver->init_termios.c_cflag =
B9600 | CS8 | CREAD | HUPCL | CLOCAL;
dbc_tty_driver->init_termios.c_ispeed = 9600;
--
2.43.0
From: Raju Rangoju <Raju.Rangoju(a)amd.com>
During the High-Speed Isochronous Audio transfers, xHCI
controller on certain AMD platforms experiences momentary data
loss. This results in Missed Service Errors (MSE) being
generated by the xHCI.
The root cause of the MSE is attributed to the ISOC OUT endpoint
being omitted from scheduling. This can happen when an IN
endpoint with a 64ms service interval either is pre-scheduled
prior to the ISOC OUT endpoint or the interval of the ISOC OUT
endpoint is shorter than that of the IN endpoint. Consequently,
the OUT service is neglected when an IN endpoint with a service
interval exceeding 32ms is scheduled concurrently (every 64ms in
this scenario).
This issue is particularly seen on certain older AMD platforms.
To mitigate this problem, it is recommended to adjust the service
interval of the IN endpoint to not exceed 32ms (interval 8). This
adjustment ensures that the OUT endpoint will not be bypassed,
even if a smaller interval value is utilized.
Cc: stable(a)vger.kernel.org
Signed-off-by: Raju Rangoju <Raju.Rangoju(a)amd.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-mem.c | 4 ++++
drivers/usb/host/xhci-pci.c | 25 +++++++++++++++++++++++++
drivers/usb/host/xhci.h | 1 +
3 files changed, 30 insertions(+)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index bd745a0f2f78..6680afa4f596 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1449,6 +1449,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
/* Periodic endpoint bInterval limit quirk */
if (usb_endpoint_xfer_int(&ep->desc) ||
usb_endpoint_xfer_isoc(&ep->desc)) {
+ if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_9) &&
+ interval >= 9) {
+ interval = 8;
+ }
if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) &&
udev->speed >= USB_SPEED_HIGH &&
interval >= 7) {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 0c481cbc8f08..00fac8b233d2 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -71,12 +71,22 @@
#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec
#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0
+#define PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI 0x13ed
+#define PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI 0x13ee
+#define PCI_DEVICE_ID_AMD_STARSHIP_XHCI 0x148c
+#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI 0x15d4
+#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI 0x15d5
+#define PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI 0x15e0
+#define PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI 0x15e1
+#define PCI_DEVICE_ID_AMD_RAVEN2_XHCI 0x15e5
#define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639
#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba
#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb
#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc
+#define PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI 0x7316
+
#define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042
#define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142
#define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242
@@ -280,6 +290,21 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_NEC)
xhci->quirks |= XHCI_NEC_HOST;
+ if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+ (pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_STARSHIP_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN2_XHCI))
+ xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9;
+
+ if (pdev->vendor == PCI_VENDOR_ID_ATI &&
+ pdev->device == PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI)
+ xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9;
+
if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96)
xhci->quirks |= XHCI_AMD_0x96_HOST;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index fa1ea0e0c7fb..a20f4e7cd43a 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1643,6 +1643,7 @@ struct xhci_hcd {
#define XHCI_WRITE_64_HI_LO BIT_ULL(47)
#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48)
#define XHCI_ETRON_HOST BIT_ULL(49)
+#define XHCI_LIMIT_ENDPOINT_INTERVAL_9 BIT_ULL(50)
unsigned int num_active_eps;
unsigned int limit_active_eps;
--
2.43.0
On Mon, Jun 02, 2025 at 02:44:33AM +0000, Parav Pandit wrote:
> When the PCI device is surprise removed, requests may not complete
> the device as the VQ is marked as broken. Due to this, the disk
> deletion hangs.
There are loops in the core virtio driver code that expect device
register reads to eventually return 0:
drivers/virtio/virtio_pci_modern.c:vp_reset()
drivers/virtio/virtio_pci_modern_dev.c:vp_modern_set_queue_reset()
Is there a hang if these loops are hit when a device has been surprise
removed? I'm trying to understand whether surprise removal is fully
supported or whether this patch is one step in that direction.
Apart from that, I'm happy with the virtio_blk.c aspects of the patch:
Reviewed-by: Stefan Hajnoczi <stefanha(a)redhat.com>
>
> Fix it by aborting the requests when the VQ is broken.
>
> With this fix now fio completes swiftly.
> An alternative of IO timeout has been considered, however
> when the driver knows about unresponsive block device, swiftly clearing
> them enables users and upper layers to react quickly.
>
> Verified with multiple device unplug iterations with pending requests in
> virtio used ring and some pending with the device.
>
> Fixes: 43bb40c5b926 ("virtio_pci: Support surprise removal of virtio pci device")
> Cc: stable(a)vger.kernel.org
> Reported-by: Li RongQing <lirongqing(a)baidu.com>
> Closes: https://lore.kernel.org/virtualization/c45dd68698cd47238c55fb73ca9b4741@bai…
> Reviewed-by: Max Gurtovoy <mgurtovoy(a)nvidia.com>
> Reviewed-by: Israel Rukshin <israelr(a)nvidia.com>
> Signed-off-by: Parav Pandit <parav(a)nvidia.com>
>
> ---
> v4->v5:
> - fixed comment style where comment to start with one empty line at start
> - Addressed comments from Alok
> - fixed typo in broken vq check
> v3->v4:
> - Addressed comments from Michael
> - renamed virtblk_request_cancel() to
> virtblk_complete_request_with_ioerr()
> - Added comments for virtblk_complete_request_with_ioerr()
> - Renamed virtblk_broken_device_cleanup() to
> virtblk_cleanup_broken_device()
> - Added comments for virtblk_cleanup_broken_device()
> - Moved the broken vq check in virtblk_remove()
> - Fixed comment style to have first empty line
> - replaced freezed to frozen
> - Fixed comments rephrased
>
> v2->v3:
> - Addressed comments from Michael
> - updated comment for synchronizing with callbacks
>
> v1->v2:
> - Addressed comments from Stephan
> - fixed spelling to 'waiting'
> - Addressed comments from Michael
> - Dropped checking broken vq from queue_rq() and queue_rqs()
> because it is checked in lower layer routines in virtio core
>
> v0->v1:
> - Fixed comments from Stefan to rename a cleanup function
> - Improved logic for handling any outstanding requests
> in bio layer
> - improved cancel callback to sync with ongoing done()
> ---
> drivers/block/virtio_blk.c | 95 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 95 insertions(+)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 7cffea01d868..c5e383c0ac48 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -1554,6 +1554,98 @@ static int virtblk_probe(struct virtio_device *vdev)
> return err;
> }
>
> +/*
> + * If the vq is broken, device will not complete requests.
> + * So we do it for the device.
> + */
> +static bool virtblk_complete_request_with_ioerr(struct request *rq, void *data)
> +{
> + struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
> + struct virtio_blk *vblk = data;
> + struct virtio_blk_vq *vq;
> + unsigned long flags;
> +
> + vq = &vblk->vqs[rq->mq_hctx->queue_num];
> +
> + spin_lock_irqsave(&vq->lock, flags);
> +
> + vbr->in_hdr.status = VIRTIO_BLK_S_IOERR;
> + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq))
> + blk_mq_complete_request(rq);
> +
> + spin_unlock_irqrestore(&vq->lock, flags);
> + return true;
> +}
> +
> +/*
> + * If the device is broken, it will not use any buffers and waiting
> + * for that to happen is pointless. We'll do the cleanup in the driver,
> + * completing all requests for the device.
> + */
> +static void virtblk_cleanup_broken_device(struct virtio_blk *vblk)
> +{
> + struct request_queue *q = vblk->disk->queue;
> +
> + /*
> + * Start freezing the queue, so that new requests keeps waiting at the
> + * door of bio_queue_enter(). We cannot fully freeze the queue because
> + * frozen queue is an empty queue and there are pending requests, so
> + * only start freezing it.
> + */
> + blk_freeze_queue_start(q);
> +
> + /*
> + * When quiescing completes, all ongoing dispatches have completed
> + * and no new dispatch will happen towards the driver.
> + */
> + blk_mq_quiesce_queue(q);
> +
> + /*
> + * Synchronize with any ongoing VQ callbacks that may have started
> + * before the VQs were marked as broken. Any outstanding requests
> + * will be completed by virtblk_complete_request_with_ioerr().
> + */
> + virtio_synchronize_cbs(vblk->vdev);
> +
> + /*
> + * At this point, no new requests can enter the queue_rq() and
> + * completion routine will not complete any new requests either for the
> + * broken vq. Hence, it is safe to cancel all requests which are
> + * started.
> + */
> + blk_mq_tagset_busy_iter(&vblk->tag_set,
> + virtblk_complete_request_with_ioerr, vblk);
> + blk_mq_tagset_wait_completed_request(&vblk->tag_set);
> +
> + /*
> + * All pending requests are cleaned up. Time to resume so that disk
> + * deletion can be smooth. Start the HW queues so that when queue is
> + * unquiesced requests can again enter the driver.
> + */
> + blk_mq_start_stopped_hw_queues(q, true);
> +
> + /*
> + * Unquiescing will trigger dispatching any pending requests to the
> + * driver which has crossed bio_queue_enter() to the driver.
> + */
> + blk_mq_unquiesce_queue(q);
> +
> + /*
> + * Wait for all pending dispatches to terminate which may have been
> + * initiated after unquiescing.
> + */
> + blk_mq_freeze_queue_wait(q);
> +
> + /*
> + * Mark the disk dead so that once we unfreeze the queue, requests
> + * waiting at the door of bio_queue_enter() can be aborted right away.
> + */
> + blk_mark_disk_dead(vblk->disk);
> +
> + /* Unfreeze the queue so that any waiting requests will be aborted. */
> + blk_mq_unfreeze_queue_nomemrestore(q);
> +}
> +
> static void virtblk_remove(struct virtio_device *vdev)
> {
> struct virtio_blk *vblk = vdev->priv;
> @@ -1561,6 +1653,9 @@ static void virtblk_remove(struct virtio_device *vdev)
> /* Make sure no work handler is accessing the device. */
> flush_work(&vblk->config_work);
>
> + if (virtqueue_is_broken(vblk->vqs[0].vq))
> + virtblk_cleanup_broken_device(vblk);
> +
> del_gendisk(vblk->disk);
> blk_mq_free_tag_set(&vblk->tag_set);
>
> --
> 2.34.1
>
This patch series aims at adding support for Exynos7870's DECON in the
Exynos7 DECON driver. It introduces a driver data struct so that support
for DECON on other SoCs can be added to it in the future.
It also fixes a few bugs in the driver, such as functions receiving bad
pointers.
Tested on Samsung Galaxy J7 Prime (samsung-on7xelte), Samsung Galaxy A2
Core (samsung-a2corelte), and Samsung Galaxy J6 (samsung-j6lte).
Signed-off-by: Kaustabh Chakraborty <kauschluss(a)disroot.org>
---
Changes in v3:
- Add a new commit documenting iommus and ports dt properties.
- Link to v2: https://lore.kernel.org/r/20250612-exynosdrm-decon-v2-0-d6c1d21c8057@disroo…
Changes in v2:
- Add a new commit to prevent an occasional panic under circumstances.
- Rewrite and redo [v1 2/6] to be a more sensible commit.
- Link to v1: https://lore.kernel.org/r/20240919-exynosdrm-decon-v1-0-6c5861c1cb04@disroo…
---
Kaustabh Chakraborty (3):
dt-bindings: display: samsung,exynos7-decon: add properties for iommus and ports
drm/exynos: exynos7_drm_decon: fix call of decon_commit()
drm/exynos: exynos7_drm_decon: add vblank check in IRQ handling
.../bindings/display/samsung/samsung,exynos7-decon.yaml | 8 ++++++++
drivers/gpu/drm/exynos/exynos7_drm_decon.c | 8 ++++++--
2 files changed, 14 insertions(+), 2 deletions(-)
---
base-commit: 1b152eeca84a02bdb648f16b82ef3394007a9dcf
change-id: 20240917-exynosdrm-decon-4c228dd1d2bf
Best regards,
--
Kaustabh Chakraborty <kauschluss(a)disroot.org>
On 11 Oct 2022, it was reported that the crc32 verification
of the u-boot environment failed only on big-endian systems
for the u-boot-env nvmem layout driver with the following error.
Invalid calculated CRC32: 0x88cd6f09 (expected: 0x096fcd88)
This problem has been present since the driver was introduced,
and before it was made into a layout driver.
The suggested fix at the time was to use further endianness
conversion macros in order to have both the stored and calculated
crc32 values to compare always represented in the system's endianness.
This was not accepted due to sparse warnings
and some disagreement on how to handle the situation.
Later on in a newer revision of the patch, it was proposed to use
cpu_to_le32() for both values to compare instead of le32_to_cpu()
and store the values as __le32 type to remove compilation errors.
The necessity of this is based on the assumption that the use of crc32()
requires endianness conversion because the algorithm uses little-endian,
however, this does not prove to be the case and the issue is unrelated.
Upon inspecting the current kernel code,
there already is an existing use of le32_to_cpu() in this driver,
which suggests there already is special handling for big-endian systems,
however, it is big-endian systems that have the problem.
This, being the only functional difference between architectures
in the driver combined with the fact that the suggested fix
was to use the exact same endianness conversion for the values
brings up the possibility that it was not necessary to begin with,
as the same endianness conversion for two values expected to be the same
is expected to be equivalent to no conversion at all.
After inspecting the u-boot environment of devices of both endianness
and trying to remove the existing endianness conversion,
the problem is resolved in an equivalent way as the other suggested fixes.
Ultimately, it seems that u-boot is agnostic to endianness
at least for the purpose of environment variables.
In other words, u-boot reads and writes the stored crc32 value
with the same endianness that the crc32 value is calculated with
in whichever endianness a certain architecture runs on.
Therefore, the u-boot-env driver does not need to convert endianness.
Remove the usage of endianness macros in the u-boot-env driver,
and change the type of local variables to maintain the same return type.
If there is a special situation in the case of endianness,
it would be a corner case and should be handled by a unique "compatible".
Even though it is not necessary to use endianness conversion macros here,
it may be useful to use them in the future for consistent error printing.
Fixes: d5542923f200 ("nvmem: add driver handling U-Boot environment variables")
Reported-by: INAGAKI Hiroshi <musashino.open(a)gmail.com>
Link: https://lore.kernel.org/all/20221011024928.1807-1-musashino.open@gmail.com
Cc: <stable(a)vger.kernel.org> # 6.12.x
Cc: <stable(a)vger.kernel.org> # 6.6.x: f4cf4e5: Revert "nvmem: add new config option"
Cc: <stable(a)vger.kernel.org> # 6.6.x: 7f38b70: of: device: Export of_device_make_bus_id()
Cc: <stable(a)vger.kernel.org> # 6.6.x: 4a1a402: nvmem: Move of_nvmem_layout_get_container() in another header
Cc: <stable(a)vger.kernel.org> # 6.6.x: fc29fd8: nvmem: core: Rework layouts to become regular devices
Cc: <stable(a)vger.kernel.org> # 6.6.x: 0331c61: nvmem: core: Expose cells through sysfs
Cc: <stable(a)vger.kernel.org> # 6.6.x: 401df0d: nvmem: layouts: refactor .add_cells() callback arguments
Cc: <stable(a)vger.kernel.org> # 6.6.x: 6d0ca4a: nvmem: layouts: store owner from modules with nvmem_layout_driver_register()
Cc: <stable(a)vger.kernel.org> # 6.6.x: 5f15811: nvmem: layouts: add U-Boot env layout
Cc: <stable(a)vger.kernel.org> # 6.6.x
Signed-off-by: Michael C. Pratt <mcpratt(a)pm.me>
---
drivers/nvmem/layouts/u-boot-env.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/nvmem/layouts/u-boot-env.c b/drivers/nvmem/layouts/u-boot-env.c
index 731e6f4f12b2..21f6dcf905dd 100644
--- a/drivers/nvmem/layouts/u-boot-env.c
+++ b/drivers/nvmem/layouts/u-boot-env.c
@@ -92,7 +92,7 @@ int u_boot_env_parse(struct device *dev, struct nvmem_device *nvmem,
size_t crc32_data_offset;
size_t crc32_data_len;
size_t crc32_offset;
- __le32 *crc32_addr;
+ uint32_t *crc32_addr;
size_t data_offset;
size_t data_len;
size_t dev_size;
@@ -143,8 +143,8 @@ int u_boot_env_parse(struct device *dev, struct nvmem_device *nvmem,
goto err_kfree;
}
- crc32_addr = (__le32 *)(buf + crc32_offset);
- crc32 = le32_to_cpu(*crc32_addr);
+ crc32_addr = (uint32_t *)(buf + crc32_offset);
+ crc32 = *crc32_addr;
crc32_data_len = dev_size - crc32_data_offset;
data_len = dev_size - data_offset;
base-commit: 38fec10eb60d687e30c8c6b5420d86e8149f7557
--
2.30.2
Property num_cpu and feature is read-only once eiointc is created, which
is set with KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL attr group before device
creation.
Attr group KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS is to update register
and software state for migration and reset usage, property num_cpu and
feature can not be update again if it is created already.
Here discard write operation with property num_cpu and feature in attr
group KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL.
Cc: stable(a)vger.kernel.org
Fixes: 1ad7efa552fd ("LoongArch: KVM: Add EIOINTC user mode read and write functions")
Signed-off-by: Bibo Mao <maobibo(a)loongson.cn>
---
arch/loongarch/kvm/intc/eiointc.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c
index 0b648c56b0c3..b48511f903b5 100644
--- a/arch/loongarch/kvm/intc/eiointc.c
+++ b/arch/loongarch/kvm/intc/eiointc.c
@@ -910,9 +910,22 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev,
data = (void __user *)attr->addr;
switch (addr) {
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU:
+ /*
+ * Property num_cpu and feature is read-only once eiointc is
+ * created with KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL group API
+ *
+ * Disable writing with KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS
+ * group API
+ */
+ if (is_write)
+ return ret;
+
p = &s->num_cpu;
break;
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE:
+ if (is_write)
+ return ret;
+
p = &s->features;
break;
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE:
--
2.39.3
From: John Fastabend <john.fastabend(a)gmail.com>
[ Upstream commit a454d84ee20baf7bd7be90721b9821f73c7d23d9 ]
There is a race where skb's from the sk_psock_backlog can be referenced
after userspace side has already skb_consumed() the sk_buff and its refcnt
dropped to zer0 causing use after free.
The flow is the following:
while ((skb = skb_peek(&psock->ingress_skb))
sk_psock_handle_Skb(psock, skb, ..., ingress)
if (!ingress) ...
sk_psock_skb_ingress
sk_psock_skb_ingress_enqueue(skb)
msg->skb = skb
sk_psock_queue_msg(psock, msg)
skb_dequeue(&psock->ingress_skb)
The sk_psock_queue_msg() puts the msg on the ingress_msg queue. This is
what the application reads when recvmsg() is called. An application can
read this anytime after the msg is placed on the queue. The recvmsg hook
will also read msg->skb and then after user space reads the msg will call
consume_skb(skb) on it effectively free'ing it.
But, the race is in above where backlog queue still has a reference to
the skb and calls skb_dequeue(). If the skb_dequeue happens after the
user reads and free's the skb we have a use after free.
The !ingress case does not suffer from this problem because it uses
sendmsg_*(sk, msg) which does not pass the sk_buff further down the
stack.
The following splat was observed with 'test_progs -t sockmap_listen':
[ 1022.710250][ T2556] general protection fault, ...
[...]
[ 1022.712830][ T2556] Workqueue: events sk_psock_backlog
[ 1022.713262][ T2556] RIP: 0010:skb_dequeue+0x4c/0x80
[ 1022.713653][ T2556] Code: ...
[...]
[ 1022.720699][ T2556] Call Trace:
[ 1022.720984][ T2556] <TASK>
[ 1022.721254][ T2556] ? die_addr+0x32/0x80^M
[ 1022.721589][ T2556] ? exc_general_protection+0x25a/0x4b0
[ 1022.722026][ T2556] ? asm_exc_general_protection+0x22/0x30
[ 1022.722489][ T2556] ? skb_dequeue+0x4c/0x80
[ 1022.722854][ T2556] sk_psock_backlog+0x27a/0x300
[ 1022.723243][ T2556] process_one_work+0x2a7/0x5b0
[ 1022.723633][ T2556] worker_thread+0x4f/0x3a0
[ 1022.723998][ T2556] ? __pfx_worker_thread+0x10/0x10
[ 1022.724386][ T2556] kthread+0xfd/0x130
[ 1022.724709][ T2556] ? __pfx_kthread+0x10/0x10
[ 1022.725066][ T2556] ret_from_fork+0x2d/0x50
[ 1022.725409][ T2556] ? __pfx_kthread+0x10/0x10
[ 1022.725799][ T2556] ret_from_fork_asm+0x1b/0x30
[ 1022.726201][ T2556] </TASK>
To fix we add an skb_get() before passing the skb to be enqueued in the
engress queue. This bumps the skb->users refcnt so that consume_skb()
and kfree_skb will not immediately free the sk_buff. With this we can
be sure the skb is still around when we do the dequeue. Then we just
need to decrement the refcnt or free the skb in the backlog case which
we do by calling kfree_skb() on the ingress case as well as the sendmsg
case.
Before locking change from fixes tag we had the sock locked so we
couldn't race with user and there was no issue here.
[ Backport to 5.15: context cleanly applied with no semantic changes.
Build-tested. ]
Fixes: 799aa7f98d53e ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
Reported-by: Jiri Olsa <jolsa(a)kernel.org>
Signed-off-by: John Fastabend <john.fastabend(a)gmail.com>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Tested-by: Xu Kuohai <xukuohai(a)huawei.com>
Tested-by: Jiri Olsa <jolsa(a)kernel.org>
Link: https://lore.kernel.org/bpf/20230901202137.214666-1-john.fastabend@gmail.com
Signed-off-by: Pranav Tyagi <pranav.tyagi03(a)gmail.com>
---
net/core/skmsg.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a5947aa55983..a13ddb9976ad 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -608,12 +608,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
+ int err = 0;
+
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- return sk_psock_skb_ingress(psock, skb, off, len);
+ skb_get(skb);
+ err = sk_psock_skb_ingress(psock, skb, off, len);
+ if (err < 0)
+ kfree_skb(skb);
+ return err;
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -681,9 +687,7 @@ static void sk_psock_backlog(struct work_struct *work)
} while (len);
skb = skb_dequeue(&psock->ingress_skb);
- if (!ingress) {
- kfree_skb(skb);
- }
+ kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
--
2.49.0
When two masters share an IOMMU, calling ops->of_xlate during
the second master's driver init may overwrite iommu->domain set
by the first. This causes the check if (iommu->domain == domain)
in rk_iommu_attach_device() to fail, resulting in the same
iommu->node being added twice to &rk_domain->iommus, which can
lead to an infinite loop in subsequent &rk_domain->iommus operations.
Fixes: 25c2325575cc ("iommu/rockchip: Add missing set_platform_dma_ops callback")
Signed-off-by: Simon Xue <xxm(a)rock-chips.com>
Reviewed-by: Robin Murphy <robin.murphy(a)arm.com>
v2:
No functional changes.
---
drivers/iommu/rockchip-iommu.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 22f74ba33a0e..e6bb3c784017 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1157,7 +1157,6 @@ static int rk_iommu_of_xlate(struct device *dev,
return -ENOMEM;
data->iommu = platform_get_drvdata(iommu_dev);
- data->iommu->domain = &rk_identity_domain;
dev_iommu_priv_set(dev, data);
platform_device_put(iommu_dev);
@@ -1195,6 +1194,8 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (!iommu)
return -ENOMEM;
+ iommu->domain = &rk_identity_domain;
+
platform_set_drvdata(pdev, iommu);
iommu->dev = dev;
iommu->num_mmu = 0;
--
2.34.1
When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on
the nominal P0 frequency, which deviates slightly (typically ~0.2%) from
the actual mean TSC frequency due to clocking parameters. Over extended VM
uptime, this discrepancy accumulates, causing clock skew between the
hypervisor and SEV-SNP VM, leading to early timer interrupts as perceived
by the guest.
The guest kernel relies on the reported nominal frequency for TSC-based
timekeeping, while the actual frequency set during SNP_LAUNCH_START may
differ. This mismatch results in inaccurate time calculations, causing the
guest to perceive hrtimers as firing earlier than expected.
Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets
Page Format" in the SNP Firmware ABI Specification) to calculate the mean
TSC frequency, ensuring accurate timekeeping and mitigating clock skew in
SEV-SNP VMs.
Use early_ioremap_encrypted() to map the secrets page as
ioremap_encrypted() uses kmalloc() which is not available during early TSC
initialization and causes a panic.
Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikunj A Dadhania <nikunj(a)amd.com>
---
v2:
* Move the SNP TSC scaling constant to the header (Dionna)
* Drop the unsigned long cast and add in securetsc_get_tsc_khz (Tom)
* Drop the RB from Tom as the code has changed
---
arch/x86/include/asm/sev.h | 18 +++++++++++++++++-
arch/x86/coco/sev/core.c | 16 ++++++++++++++--
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index fbb616fcbfb8..869355367210 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -223,6 +223,19 @@ struct snp_tsc_info_resp {
u8 rsvd2[100];
} __packed;
+
+/*
+ * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with
+ * TSC_FACTOR as documented in the SNP Firmware ABI specification:
+ *
+ * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001))
+ *
+ * which is equivalent to:
+ *
+ * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000;
+ */
+#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - ((freq) * (factor)) / 100000)
+
struct snp_guest_req {
void *req_buf;
size_t req_sz;
@@ -283,8 +296,11 @@ struct snp_secrets_page {
u8 svsm_guest_vmpl;
u8 rsvd3[3];
+ /* The percentage decrease from nominal to mean TSC frequency. */
+ u32 tsc_factor;
+
/* Remainder of page */
- u8 rsvd4[3744];
+ u8 rsvd4[3740];
} __packed;
struct snp_msg_desc {
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 8375ca7fbd8a..36f419ff25d4 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -2156,20 +2156,32 @@ void __init snp_secure_tsc_prepare(void)
static unsigned long securetsc_get_tsc_khz(void)
{
- return snp_tsc_freq_khz;
+ return (unsigned long)snp_tsc_freq_khz;
}
void __init snp_secure_tsc_init(void)
{
+ struct snp_secrets_page *secrets;
unsigned long long tsc_freq_mhz;
+ void *mem;
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
return;
+ mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
+ if (!mem) {
+ pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
+ }
+
+ secrets = (__force struct snp_secrets_page *)mem;
+
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
- snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000);
+ snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
+
+ early_memunmap(mem, PAGE_SIZE);
}
base-commit: 49151ac6671fe0372261054daf5e4da3567b8271
--
2.43.0
Commit <4f1492efb495> ("iommu/vt-d: Revert ATS timing change to fix boot
failure") placed the enabling of ATS in the probe_finalize callback. This
occurs after the default domain attachment, which is when the ATS cache
tag is assigned. Consequently, the device TLB cache tag is missed when the
domain is attached, leading to the device TLB not being invalidated in the
iommu_unmap paths.
Fix this by assigning the CACHE_TAG_DEVTLB cache tag when ATS is enabled.
Fixes: 4f1492efb495 ("iommu/vt-d: Revert ATS timing change to fix boot failure")
Cc: stable(a)vger.kernel.org
Suggested-by: Kevin Tian <kevin.tian(a)intel.com>
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Tested-by: Shuicheng Lin <shuicheng.lin(a)intel.com>
---
drivers/iommu/intel/cache.c | 5 ++---
drivers/iommu/intel/iommu.c | 11 ++++++++++-
drivers/iommu/intel/iommu.h | 2 ++
3 files changed, 14 insertions(+), 4 deletions(-)
Change log:
v2:
- The v1 solution has a flaw: ATS will never be enabled for drivers with
driver_managed_dma enabled, as their devices are not expected to be
automatically attached to the default domain.
v1: https://lore.kernel.org/linux-iommu/20250620060802.3036137-1-baolu.lu@linux…
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index fc35cba59145..47692cbfaabd 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
}
/* Assign a cache tag with specified type to domain. */
-static int cache_tag_assign(struct dmar_domain *domain, u16 did,
- struct device *dev, ioasid_t pasid,
- enum cache_tag_type type)
+int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7aa3932251b2..148b944143b8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3780,8 +3780,17 @@ static void intel_iommu_probe_finalize(struct device *dev)
!pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
info->pasid_enabled = 1;
- if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev))
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
iommu_enable_pci_ats(info);
+ /* Assign a DEVTLB cache tag to the default domain. */
+ if (info->ats_enabled && info->domain) {
+ u16 did = domain_id_iommu(info->domain, iommu);
+
+ if (cache_tag_assign(info->domain, did, dev,
+ IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
+ iommu_disable_pci_ats(info);
+ }
+ }
iommu_enable_pci_pri(info);
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 3ddbcc603de2..2d1afab5eedc 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1289,6 +1289,8 @@ struct cache_tag {
unsigned int users;
};
+int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type);
int cache_tag_assign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid);
void cache_tag_unassign_domain(struct dmar_domain *domain,
--
2.43.0
The i2c_dw_xfer_init() function requires msgs and msg_write_idx from the
dev context to be initialized.
amd_i2c_dw_xfer_quirk() inits msgs and msgs_num, but not msg_write_idx.
This could allow an out of bounds access (of msgs).
Initialize msg_write_idx before calling i2c_dw_xfer_init().
Fixes: 17631e8ca2d3 ("i2c: designware: Add driver support for AMD NAVI GPU")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
---
drivers/i2c/busses/i2c-designware-master.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index c5394229b77f..40aa5114bf8c 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -363,6 +363,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs,
dev->msgs = msgs;
dev->msgs_num = num_msgs;
+ dev->msg_write_idx = 0;
i2c_dw_xfer_init(dev);
/* Initiate messages read/write transaction */
--
2.49.0
PL1 cannot be disabled on some platforms. The ENABLE bit is still set
after software clears it. This behavior leads to a scenario where, upon
user request to disable the Power Limit through the powercap sysfs, the
ENABLE bit remains set while the CLAMPING bit is inadvertently cleared.
According to the Intel Software Developer's Manual, the CLAMPING bit,
"When set, allows the processor to go below the OS requested P states in
order to maintain the power below specified Platform Power Limit value."
Thus this means the system may operate at higher power levels than
intended on such platforms.
Enhance the code to check ENABLE bit after writing to it, and stop
further processing if ENABLE bit cannot be changed.
Cc: stable(a)vger.kernel.org
Reported-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Fixes: 2d281d8196e3 ("PowerCap: Introduce Intel RAPL power capping driver")
Signed-off-by: Zhang Rui <rui.zhang(a)intel.com>
---
Changes since V1:
- Add Fixes tag
- CC stable kernel
---
drivers/powercap/intel_rapl_common.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index e3be40adc0d7..602f540cbe15 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -341,12 +341,27 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
struct rapl_defaults *defaults = get_defaults(rd->rp);
+ u64 val;
int ret;
cpus_read_lock();
ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
- if (!ret && defaults->set_floor_freq)
+ if (ret)
+ goto end;
+
+ ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, false, &val);
+ if (ret)
+ goto end;
+
+ if (mode != val) {
+ pr_debug("%s cannot be %s\n", power_zone->name, mode ? "enabled" : "disabled");
+ goto end;
+ }
+
+ if (defaults->set_floor_freq)
defaults->set_floor_freq(rd, mode);
+
+end:
cpus_read_unlock();
return ret;
--
2.43.0
The i2c_dw_xfer_init() function requires msgs and msg_write_idx from the
dev context to be initialized.
amd_i2c_dw_xfer_quirk() inits msgs and msgs_num, but not msg_write_idx.
This could allow an out of bounds access (of msgs).
Initialize msg_write_idx before calling i2c_dw_xfer_init().
Fixes: 17631e8ca2d3 ("i2c: designware: Add driver support for AMD NAVI GPU")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
---
drivers/i2c/busses/i2c-designware-master.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index c5394229b77f..40aa5114bf8c 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -363,6 +363,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs,
dev->msgs = msgs;
dev->msgs_num = num_msgs;
+ dev->msg_write_idx = 0;
i2c_dw_xfer_init(dev);
/* Initiate messages read/write transaction */
--
2.49.0
From: Maíra Canal <mcanal(a)igalia.com>
[ Upstream commit a0e6a017ab56936c0405fe914a793b241ed25ee0 ]
Currently, it is possible for the composer to be set as enabled and then
as disabled without a proper call for the vkms_vblank_simulate(). This
is problematic, because the driver would skip one CRC output, causing CRC
tests to fail. Therefore, we need to make sure that, for each time the
composer is set as enabled, a composer job is added to the queue.
In order to provide this guarantee, add a mutex that will lock before
the composer is set as enabled and will unlock only after the composer
job is added to the queue. This way, we can have a guarantee that the
driver won't skip a CRC entry.
This race-condition is affecting the IGT test "writeback-check-output",
making the test fail and also, leaking writeback framebuffers, as the
writeback job is queued, but it is not signaled. This patch avoids both
problems.
[v2]:
* Create a new mutex and keep the spinlock across the atomic commit in
order to avoid interrupts that could result in deadlocks.
[ Backport to 5.15: context cleanly applied with no semantic changes.
Build-tested. ]
Signed-off-by: Maíra Canal <mcanal(a)igalia.com>
Reviewed-by: Arthur Grillo <arthurgrillo(a)riseup.net>
Signed-off-by: Maíra Canal <mairacanal(a)riseup.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20230523123207.173976-1-mcana…
Signed-off-by: Pranav Tyagi <pranav.tyagi03(a)gmail.com>
---
drivers/gpu/drm/vkms/vkms_composer.c | 9 +++++++--
drivers/gpu/drm/vkms/vkms_crtc.c | 9 +++++----
drivers/gpu/drm/vkms/vkms_drv.h | 4 +++-
3 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
index 9e8204be9a14..77fced36af55 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -332,10 +332,15 @@ void vkms_set_composer(struct vkms_output *out, bool enabled)
if (enabled)
drm_crtc_vblank_get(&out->crtc);
- spin_lock_irq(&out->lock);
+ mutex_lock(&out->enabled_lock);
old_enabled = out->composer_enabled;
out->composer_enabled = enabled;
- spin_unlock_irq(&out->lock);
+
+ /* the composition wasn't enabled, so unlock the lock to make sure the lock
+ * will be balanced even if we have a failed commit
+ */
+ if (!out->composer_enabled)
+ mutex_unlock(&out->enabled_lock);
if (old_enabled)
drm_crtc_vblank_put(&out->crtc);
diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c
index 57bbd32e9beb..1b02dee8587a 100644
--- a/drivers/gpu/drm/vkms/vkms_crtc.c
+++ b/drivers/gpu/drm/vkms/vkms_crtc.c
@@ -16,7 +16,7 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer)
struct drm_crtc *crtc = &output->crtc;
struct vkms_crtc_state *state;
u64 ret_overrun;
- bool ret, fence_cookie;
+ bool ret, fence_cookie, composer_enabled;
fence_cookie = dma_fence_begin_signalling();
@@ -25,15 +25,15 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer)
if (ret_overrun != 1)
pr_warn("%s: vblank timer overrun\n", __func__);
- spin_lock(&output->lock);
ret = drm_crtc_handle_vblank(crtc);
if (!ret)
DRM_ERROR("vkms failure on handling vblank");
state = output->composer_state;
- spin_unlock(&output->lock);
+ composer_enabled = output->composer_enabled;
+ mutex_unlock(&output->enabled_lock);
- if (state && output->composer_enabled) {
+ if (state && composer_enabled) {
u64 frame = drm_crtc_accurate_vblank_count(crtc);
/* update frame_start only if a queued vkms_composer_worker()
@@ -293,6 +293,7 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
spin_lock_init(&vkms_out->lock);
spin_lock_init(&vkms_out->composer_lock);
+ mutex_init(&vkms_out->enabled_lock);
vkms_out->composer_workq = alloc_ordered_workqueue("vkms_composer", 0);
if (!vkms_out->composer_workq)
diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
index d48c23d40ce5..666997e2bcab 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.h
+++ b/drivers/gpu/drm/vkms/vkms_drv.h
@@ -83,8 +83,10 @@ struct vkms_output {
struct workqueue_struct *composer_workq;
/* protects concurrent access to composer */
spinlock_t lock;
+ /* guarantees that if the composer is enabled, a job will be queued */
+ struct mutex enabled_lock;
- /* protected by @lock */
+ /* protected by @enabled_lock */
bool composer_enabled;
struct vkms_crtc_state *composer_state;
--
2.49.0
USB3 devices connected behind several external suspended hubs may not
be detected when plugged in due to aggressive hub runtime pm suspend.
The hub driver immediately runtime-suspends hubs if there are no
active children or port activity.
There is a delay between the wake signal causing hub resume, and driver
visible port activity on the hub downstream facing ports.
Most of the LFPS handshake, resume signaling and link training done
on the downstream ports is not visible to the hub driver until completed,
when device then will appear fully enabled and running on the port.
This delay between wake signal and detectable port change is even more
significant with chained suspended hubs where the wake signal will
propagate upstream first. Suspended hubs will only start resuming
downstream ports after upstream facing port resumes.
The hub driver may resume a USB3 hub, read status of all ports, not
yet see any activity, and runtime suspend back the hub before any
port activity is visible.
This exact case was seen when conncting USB3 devices to a suspended
Thunderbolt dock.
USB3 specification defines a 100ms tU3WakeupRetryDelay, indicating
USB3 devices expect to be resumed within 100ms after signaling wake.
if not then device will resend the wake signal.
Give the USB3 hubs twice this time (200ms) to detect any port
changes after resume, before allowing hub to runtime suspend again.
Cc: stable(a)vger.kernel.org
Fixes: 2839f5bcfcfc ("USB: Turn on auto-suspend for USB 3.0 hubs.")
Acked-by: Alan Stern <stern(a)rowland.harvard.edu>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
v2 changes
- Update commit in Fixes tag
- Add Ack from Alan Stern
drivers/usb/core/hub.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 770d1e91183c..5c12dfdef569 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -68,6 +68,12 @@
*/
#define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT 500 /* ms */
+/*
+ * Give SS hubs 200ms time after wake to train downstream links before
+ * assuming no port activity and allowing hub to runtime suspend back.
+ */
+#define USB_SS_PORT_U0_WAKE_TIME 200 /* ms */
+
/* Protect struct usb_device->state and ->children members
* Note: Both are also protected by ->dev.sem, except that ->state can
* change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */
@@ -1068,11 +1074,12 @@ int usb_remove_device(struct usb_device *udev)
enum hub_activation_type {
HUB_INIT, HUB_INIT2, HUB_INIT3, /* INITs must come first */
- HUB_POST_RESET, HUB_RESUME, HUB_RESET_RESUME,
+ HUB_POST_RESET, HUB_RESUME, HUB_RESET_RESUME, HUB_POST_RESUME,
};
static void hub_init_func2(struct work_struct *ws);
static void hub_init_func3(struct work_struct *ws);
+static void hub_post_resume(struct work_struct *ws);
static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
{
@@ -1095,6 +1102,13 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
goto init2;
goto init3;
}
+
+ if (type == HUB_POST_RESUME) {
+ usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
+ hub_put(hub);
+ return;
+ }
+
hub_get(hub);
/* The superspeed hub except for root hub has to use Hub Depth
@@ -1343,6 +1357,16 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
device_unlock(&hdev->dev);
}
+ if (type == HUB_RESUME && hub_is_superspeed(hub->hdev)) {
+ /* give usb3 downstream links training time after hub resume */
+ INIT_DELAYED_WORK(&hub->init_work, hub_post_resume);
+ queue_delayed_work(system_power_efficient_wq, &hub->init_work,
+ msecs_to_jiffies(USB_SS_PORT_U0_WAKE_TIME));
+ usb_autopm_get_interface_no_resume(
+ to_usb_interface(hub->intfdev));
+ return;
+ }
+
hub_put(hub);
}
@@ -1361,6 +1385,13 @@ static void hub_init_func3(struct work_struct *ws)
hub_activate(hub, HUB_INIT3);
}
+static void hub_post_resume(struct work_struct *ws)
+{
+ struct usb_hub *hub = container_of(ws, struct usb_hub, init_work.work);
+
+ hub_activate(hub, HUB_POST_RESUME);
+}
+
enum hub_quiescing_type {
HUB_DISCONNECT, HUB_PRE_RESET, HUB_SUSPEND
};
--
2.43.0
Changes from v1:
* Fix minor typos
* Use the more generic and standard ex_handler_default(). Had the
original code used this helper, the bug would not have been there
in the first place.
--
From: Dave Hansen <dave.hansen(a)linux.intel.com>
Right now, if XRSTOR fails a console message like this is be printed:
Bad FPU state detected at restore_fpregs_from_fpstate+0x9a/0x170, reinitializing FPU registers.
However, the text location (...+0x9a in this case) is the instruction
*AFTER* the XRSTOR. The highlighted instruction in the "Code:" dump
also points one instruction late.
The reason is that the "fixup" moves RIP up to pass the bad XRSTOR and
keep on running after returning from the #GP handler. But it does this
fixup before warning.
The resulting warning output is nonsensical because it looks like the
non-FPU-related instruction is #GP'ing.
Do not fix up RIP until after printing the warning. Do this by using
the more generic and standard ex_handler_default().
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Fixes: d5c8028b4788 ("x86/fpu: Reinitialize FPU registers if restoring FPU state fails")
Acked-by: Alison Schofield <alison.schofield(a)intel.com>
Cc: stable(a)vger.kernel.org
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Chang S. Bae <chang.seok.bae(a)intel.com>
---
b/arch/x86/mm/extable.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff -puN arch/x86/mm/extable.c~fixup-fpu-gp-ip-later arch/x86/mm/extable.c
--- a/arch/x86/mm/extable.c~fixup-fpu-gp-ip-later 2025-06-24 13:58:09.722855233 -0700
+++ b/arch/x86/mm/extable.c 2025-06-24 13:58:09.736856435 -0700
@@ -122,13 +122,12 @@ static bool ex_handler_sgx(const struct
static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
- regs->ip = ex_fixup_addr(fixup);
-
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
(void *)instruction_pointer(regs));
fpu_reset_from_exception_fixup();
- return true;
+
+ return ex_handler_default(fixup, regs);
}
/*
_
'rx_ring->size' means the count of ring descriptors multiplied by the
size of one descriptor. When increasing the count of ring descriptors,
it may exceed the limit of pool size.
[ 864.209610] page_pool_create_percpu() gave up with errno -7
[ 864.209613] txgbe 0000:11:00.0: Page pool creation failed: -7
Fix to set the pool_size to the count of ring descriptors.
Fixes: 850b971110b2 ("net: libwx: Allocate Rx and Tx resources")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jiawen Wu <jiawenwu(a)trustnetic.com>
---
drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 7f2e6cddfeb1..c57cc4f27249 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -2623,7 +2623,7 @@ static int wx_alloc_page_pool(struct wx_ring *rx_ring)
struct page_pool_params pp_params = {
.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
.order = 0,
- .pool_size = rx_ring->size,
+ .pool_size = rx_ring->count,
.nid = dev_to_node(rx_ring->dev),
.dev = rx_ring->dev,
.dma_dir = DMA_FROM_DEVICE,
--
2.48.1
The following commit has been merged into the x86/fpu branch of tip:
Commit-ID: 1cec9ac2d071cfd2da562241aab0ef701355762a
Gitweb: https://git.kernel.org/tip/1cec9ac2d071cfd2da562241aab0ef701355762a
Author: Dave Hansen <dave.hansen(a)linux.intel.com>
AuthorDate: Tue, 24 Jun 2025 14:01:48 -07:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Wed, 25 Jun 2025 16:28:06 -07:00
x86/fpu: Delay instruction pointer fixup until after warning
Right now, if XRSTOR fails a console message like this is be printed:
Bad FPU state detected at restore_fpregs_from_fpstate+0x9a/0x170, reinitializing FPU registers.
However, the text location (...+0x9a in this case) is the instruction
*AFTER* the XRSTOR. The highlighted instruction in the "Code:" dump
also points one instruction late.
The reason is that the "fixup" moves RIP up to pass the bad XRSTOR and
keep on running after returning from the #GP handler. But it does this
fixup before warning.
The resulting warning output is nonsensical because it looks like the
non-FPU-related instruction is #GP'ing.
Do not fix up RIP until after printing the warning. Do this by using
the more generic and standard ex_handler_default().
Fixes: d5c8028b4788 ("x86/fpu: Reinitialize FPU registers if restoring FPU state fails")
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: Chao Gao <chao.gao(a)intel.com>
Acked-by: Alison Schofield <alison.schofield(a)intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250624210148.97126F9E%40davehans-spike.ostc.i…
---
arch/x86/mm/extable.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index bf8dab1..2fdc1f1 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -122,13 +122,12 @@ static bool ex_handler_sgx(const struct exception_table_entry *fixup,
static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
- regs->ip = ex_fixup_addr(fixup);
-
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
(void *)instruction_pointer(regs));
fpu_reset_from_exception_fixup();
- return true;
+
+ return ex_handler_default(fixup, regs);
}
/*
Hello Andy,
On Thu, Jun 26, 2025 at 12:39:26AM +0000, Andy Yang wrote:
> Thank you once again for your efforts on this patch. I also want to
> sincerely apologize for any inconvenience caused by the email reply
> incident.
Don't even think about it :)
We might have been able to avoid this extra fix,
but that's it. No biggie regardless.
Thank you for your help debugging this.
Kind regards,
Niklas
Hi Igor,
[For context, there was a regression report in Debian at
https://bugs.debian.org/1108069]
On Thu, Jun 19, 2025 at 09:36:13PM -0500, Igor Tamara wrote:
> Package: src:linux
> Version: 6.12.32-1
> Severity: normal
> Tags: a11y
>
> Dear Maintainer,
>
> The builtin microphone on my Asus X507UA does not record, is
> recognized and some time ago it worked on Bookworm with image-6.1.0-31,
> newer images are able to record when appending snd_hda_intel.model=1043:1271
> to the boot as a workaround.
>
> The images that work with the boot option appended are, but not without
> it are:
>
> linux-image-6.15-amd64
> linux-image-6.12.32-amd64
> linux-image-6.1.0-37-amd64
> linux-image-6.1.0-0.a.test-amd64-unsigned_6.1.129-1a~test_amd64.deb
> referenced by https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1100928
> Also compiled from upstream 6.12.22 and 6.1.133 with the same result
>
> The image linux-image-6.1.0-31-amd64 worked properly, the problem was
> introduced in 129 and the result of the bisect was
>
> d26408df0e25f2bd2808d235232ab776e4dd08b9 is the first bad commit
> commit d26408df0e25f2bd2808d235232ab776e4dd08b9
> Author: Kuan-Wei Chiu <visitorckw(a)gmail.com>
> Date: Wed Jan 29 00:54:15 2025 +0800
>
> ALSA: hda: Fix headset detection failure due to unstable sort
>
> commit 3b4309546b48fc167aa615a2d881a09c0a97971f upstream.
>
> The auto_parser assumed sort() was stable, but the kernel's sort() uses
> heapsort, which has never been stable. After commit 0e02ca29a563
> ("lib/sort: optimize heapsort with double-pop variation"), the order of
> equal elements changed, causing the headset to fail to work.
>
> Fix the issue by recording the original order of elements before
> sorting and using it as a tiebreaker for equal elements in the
> comparison function.
>
> Fixes: b9030a005d58 ("ALSA: hda - Use standard sort function in hda_auto_parser.c")
> Reported-by: Austrum <austrum.lab(a)gmail.com>
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219158
> Tested-by: Austrum <austrum.lab(a)gmail.com>
> Cc: stable(a)vger.kernel.org
> Signed-off-by: Kuan-Wei Chiu <visitorckw(a)gmail.com>
> Link: https://patch.msgid.link/20250128165415.643223-1-visitorckw@gmail.com
> Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
>
> sound/pci/hda/hda_auto_parser.c | 8 +++++++-
> sound/pci/hda/hda_auto_parser.h | 1 +
> 2 files changed, 8 insertions(+), 1 deletion(-)
>
> I'm attaching the output of alsa-info_alsa-info.sh script
>
> Please let me know if I can provide more information.
Might you be able to try please the attached patch to see if it fixes
the issue?
Regards,
Salvatore
The quilt patch titled
Subject: scripts/gdb: fix dentry_name() lookup
has been removed from the -mm tree. Its filename was
scripts-gdb-fix-dentry_name-lookup.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Florian Fainelli <florian.fainelli(a)broadcom.com>
Subject: scripts/gdb: fix dentry_name() lookup
Date: Thu, 19 Jun 2025 15:51:05 -0700
The "d_iname" member was replaced with "d_shortname.string" in the commit
referenced in the Fixes tag. This prevented the GDB script "lx-mount"
command to properly function:
(gdb) lx-mounts
mount super_block devname pathname fstype options
0xff11000002d21180 0xff11000002d24800 rootfs / rootfs rw 0 0
0xff11000002e18a80 0xff11000003713000 /dev/root / ext4 rw,relatime 0 0
Python Exception <class 'gdb.error'>: There is no member named d_iname.
Error occurred in Python: There is no member named d_iname.
Link: https://lkml.kernel.org/r/20250619225105.320729-1-florian.fainelli@broadcom…
Fixes: 58cf9c383c5c ("dcache: back inline names with a struct-wrapped array of unsigned long")
Signed-off-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Jeff Layton <jlayton(a)kernel.org>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/vfs.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/scripts/gdb/linux/vfs.py~scripts-gdb-fix-dentry_name-lookup
+++ a/scripts/gdb/linux/vfs.py
@@ -22,7 +22,7 @@ def dentry_name(d):
if parent == d or parent == 0:
return ""
p = dentry_name(d['d_parent']) + "/"
- return p + d['d_iname'].string()
+ return p + d['d_shortname']['string'].string()
class DentryName(gdb.Function):
"""Return string of the full path of a dentry.
_
Patches currently in -mm which might be from florian.fainelli(a)broadcom.com are
scripts-gdb-fix-interrupts-display-after-mcp-on-x86.patch
scripts-gdb-fix-interruptspy-after-maple-tree-conversion.patch
scripts-gdb-de-reference-per-cpu-mce-interrupts.patch
The quilt patch titled
Subject: mm/damon/sysfs-schemes: free old damon_sysfs_scheme_filter->memcg_path on write
has been removed from the -mm tree. Its filename was
mm-damon-sysfs-schemes-free-old-damon_sysfs_scheme_filter-memcg_path-on-write.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/sysfs-schemes: free old damon_sysfs_scheme_filter->memcg_path on write
Date: Thu, 19 Jun 2025 11:36:07 -0700
memcg_path_store() assigns a newly allocated memory buffer to
filter->memcg_path, without deallocating the previously allocated and
assigned memory buffer. As a result, users can leak kernel memory by
continuously writing a data to memcg_path DAMOS sysfs file. Fix the leak
by deallocating the previously set memory buffer.
Link: https://lkml.kernel.org/r/20250619183608.6647-2-sj@kernel.org
Fixes: 7ee161f18b5d ("mm/damon/sysfs-schemes: implement filter directory")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.3.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/sysfs-schemes.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/damon/sysfs-schemes.c~mm-damon-sysfs-schemes-free-old-damon_sysfs_scheme_filter-memcg_path-on-write
+++ a/mm/damon/sysfs-schemes.c
@@ -472,6 +472,7 @@ static ssize_t memcg_path_store(struct k
return -ENOMEM;
strscpy(path, buf, count + 1);
+ kfree(filter->memcg_path);
filter->memcg_path = path;
return count;
}
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-introduce-damon_stat-module.patch
mm-damon-introduce-damon_stat-module-fix.patch
mm-damon-stat-calculate-and-expose-estimated-memory-bandwidth.patch
mm-damon-stat-calculate-and-expose-idle-time-percentiles.patch
docs-admin-guide-mm-damon-add-damon_stat-usage-document.patch
mm-damon-paddr-use-alloc_migartion_target-with-no-migration-fallback-nodemask.patch
revert-mm-rename-alloc_demote_folio-to-alloc_migrate_folio.patch
revert-mm-make-alloc_demote_folio-externally-invokable-for-migration.patch
selftets-damon-add-a-test-for-memcg_path-leak.patch
mm-damon-sysfs-schemes-decouple-from-damos_quota_goal_metric.patch
mm-damon-sysfs-schemes-decouple-from-damos_action.patch
mm-damon-sysfs-schemes-decouple-from-damos_wmark_metric.patch
mm-damon-sysfs-schemes-decouple-from-damos_filter_type.patch
mm-damon-sysfs-decouple-from-damon_ops_id.patch
The quilt patch titled
Subject: lib/group_cpus: fix NULL pointer dereference from group_cpus_evenly()
has been removed from the -mm tree. Its filename was
lib-group_cpus-fix-null-pointer-dereference-from-group_cpus_evenly.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Yu Kuai <yukuai3(a)huawei.com>
Subject: lib/group_cpus: fix NULL pointer dereference from group_cpus_evenly()
Date: Thu, 19 Jun 2025 21:26:55 +0800
While testing null_blk with configfs, echo 0 > poll_queues will trigger
following panic:
BUG: kernel NULL pointer dereference, address: 0000000000000010
Oops: Oops: 0000 [#1] SMP NOPTI
CPU: 27 UID: 0 PID: 920 Comm: bash Not tainted 6.15.0-02023-gadbdb95c8696-dirty #1238 PREEMPT(undef)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014
RIP: 0010:__bitmap_or+0x48/0x70
Call Trace:
<TASK>
__group_cpus_evenly+0x822/0x8c0
group_cpus_evenly+0x2d9/0x490
blk_mq_map_queues+0x1e/0x110
null_map_queues+0xc9/0x170 [null_blk]
blk_mq_update_queue_map+0xdb/0x160
blk_mq_update_nr_hw_queues+0x22b/0x560
nullb_update_nr_hw_queues+0x71/0xf0 [null_blk]
nullb_device_poll_queues_store+0xa4/0x130 [null_blk]
configfs_write_iter+0x109/0x1d0
vfs_write+0x26e/0x6f0
ksys_write+0x79/0x180
__x64_sys_write+0x1d/0x30
x64_sys_call+0x45c4/0x45f0
do_syscall_64+0xa5/0x240
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Root cause is that numgrps is set to 0, and ZERO_SIZE_PTR is returned from
kcalloc(), and later ZERO_SIZE_PTR will be deferenced.
Fix the problem by checking numgrps first in group_cpus_evenly(), and
return NULL directly if numgrps is zero.
[yukuai3(a)huawei.com: also fix the non-SMP version]
Link: https://lkml.kernel.org/r/20250620010958.1265984-1-yukuai1@huaweicloud.com
Link: https://lkml.kernel.org/r/20250619132655.3318883-1-yukuai1@huaweicloud.com
Fixes: 6a6dcae8f486 ("blk-mq: Build default queue map via group_cpus_evenly()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Reviewed-by: Jens Axboe <axboe(a)kernel.dk>
Cc: ErKun Yang <yangerkun(a)huawei.com>
Cc: John Garry <john.g.garry(a)oracle.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: "zhangyi (F)" <yi.zhang(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/group_cpus.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
--- a/lib/group_cpus.c~lib-group_cpus-fix-null-pointer-dereference-from-group_cpus_evenly
+++ a/lib/group_cpus.c
@@ -352,6 +352,9 @@ struct cpumask *group_cpus_evenly(unsign
int ret = -ENOMEM;
struct cpumask *masks = NULL;
+ if (numgrps == 0)
+ return NULL;
+
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return NULL;
@@ -426,8 +429,12 @@ struct cpumask *group_cpus_evenly(unsign
#else /* CONFIG_SMP */
struct cpumask *group_cpus_evenly(unsigned int numgrps)
{
- struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
+ struct cpumask *masks;
+ if (numgrps == 0)
+ return NULL;
+
+ masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
if (!masks)
return NULL;
_
Patches currently in -mm which might be from yukuai3(a)huawei.com are
The quilt patch titled
Subject: mm/hugetlb: remove unnecessary holding of hugetlb_lock
has been removed from the -mm tree. Its filename was
mm-hugetlb-remove-unnecessary-holding-of-hugetlb_lock.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ge Yang <yangge1116(a)126.com>
Subject: mm/hugetlb: remove unnecessary holding of hugetlb_lock
Date: Tue, 27 May 2025 11:36:50 +0800
In isolate_or_dissolve_huge_folio(), after acquiring the hugetlb_lock, it
is only for the purpose of obtaining the correct hstate, which is then
passed to alloc_and_dissolve_hugetlb_folio().
alloc_and_dissolve_hugetlb_folio() itself also acquires the hugetlb_lock.
We can have alloc_and_dissolve_hugetlb_folio() obtain the hstate by
itself, so that isolate_or_dissolve_huge_folio() no longer needs to
acquire the hugetlb_lock. In addition, we keep the folio_test_hugetlb()
check within isolate_or_dissolve_huge_folio(). By doing so, we can avoid
disrupting the normal path by vainly holding the hugetlb_lock.
replace_free_hugepage_folios() has the same issue, and we should address
it as well.
Addresses a possible performance problem which was added by the hotfix
113ed54ad276 ("mm/hugetlb: fix kernel NULL pointer dereference when
replacing free hugetlb folios").
Link: https://lkml.kernel.org/r/1748317010-16272-1-git-send-email-yangge1116@126.…
Fixes: 113ed54ad276 ("mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Suggested-by: Oscar Salvador <osalvador(a)suse.de>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 54 +++++++++++++++----------------------------------
1 file changed, 17 insertions(+), 37 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlb-remove-unnecessary-holding-of-hugetlb_lock
+++ a/mm/hugetlb.c
@@ -2787,20 +2787,24 @@ void restore_reserve_on_error(struct hst
/*
* alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
* the old one
- * @h: struct hstate old page belongs to
* @old_folio: Old folio to dissolve
* @list: List to isolate the page in case we need to
* Returns 0 on success, otherwise negated error.
*/
-static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
- struct folio *old_folio, struct list_head *list)
+static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
+ struct list_head *list)
{
- gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
+ gfp_t gfp_mask;
+ struct hstate *h;
int nid = folio_nid(old_folio);
struct folio *new_folio = NULL;
int ret = 0;
retry:
+ /*
+ * The old_folio might have been dissolved from under our feet, so make sure
+ * to carefully check the state under the lock.
+ */
spin_lock_irq(&hugetlb_lock);
if (!folio_test_hugetlb(old_folio)) {
/*
@@ -2829,8 +2833,10 @@ retry:
cond_resched();
goto retry;
} else {
+ h = folio_hstate(old_folio);
if (!new_folio) {
spin_unlock_irq(&hugetlb_lock);
+ gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
NULL, NULL);
if (!new_folio)
@@ -2874,35 +2880,24 @@ free_new:
int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list)
{
- struct hstate *h;
int ret = -EBUSY;
- /*
- * The page might have been dissolved from under our feet, so make sure
- * to carefully check the state under the lock.
- * Return success when racing as if we dissolved the page ourselves.
- */
- spin_lock_irq(&hugetlb_lock);
- if (folio_test_hugetlb(folio)) {
- h = folio_hstate(folio);
- } else {
- spin_unlock_irq(&hugetlb_lock);
+ /* Not to disrupt normal path by vainly holding hugetlb_lock */
+ if (!folio_test_hugetlb(folio))
return 0;
- }
- spin_unlock_irq(&hugetlb_lock);
/*
* Fence off gigantic pages as there is a cyclic dependency between
* alloc_contig_range and them. Return -ENOMEM as this has the effect
* of bailing out right away without further retrying.
*/
- if (hstate_is_gigantic(h))
+ if (folio_order(folio) > MAX_PAGE_ORDER)
return -ENOMEM;
if (folio_ref_count(folio) && folio_isolate_hugetlb(folio, list))
ret = 0;
else if (!folio_ref_count(folio))
- ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
+ ret = alloc_and_dissolve_hugetlb_folio(folio, list);
return ret;
}
@@ -2916,7 +2911,6 @@ int isolate_or_dissolve_huge_folio(struc
*/
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
{
- struct hstate *h;
struct folio *folio;
int ret = 0;
@@ -2925,23 +2919,9 @@ int replace_free_hugepage_folios(unsigne
while (start_pfn < end_pfn) {
folio = pfn_folio(start_pfn);
- /*
- * The folio might have been dissolved from under our feet, so make sure
- * to carefully check the state under the lock.
- */
- spin_lock_irq(&hugetlb_lock);
- if (folio_test_hugetlb(folio)) {
- h = folio_hstate(folio);
- } else {
- spin_unlock_irq(&hugetlb_lock);
- start_pfn++;
- continue;
- }
- spin_unlock_irq(&hugetlb_lock);
-
- if (!folio_ref_count(folio)) {
- ret = alloc_and_dissolve_hugetlb_folio(h, folio,
- &isolate_list);
+ /* Not to disrupt normal path by vainly holding hugetlb_lock */
+ if (folio_test_hugetlb(folio) && !folio_ref_count(folio)) {
+ ret = alloc_and_dissolve_hugetlb_folio(folio, &isolate_list);
if (ret)
break;
_
Patches currently in -mm which might be from yangge1116(a)126.com are
The quilt patch titled
Subject: fs/proc/task_mmu: fix PAGE_IS_PFNZERO detection for the huge zero folio
has been removed from the -mm tree. Its filename was
fs-proc-task_mmu-fix-page_is_pfnzero-detection-for-the-huge-zero-folio.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: fs/proc/task_mmu: fix PAGE_IS_PFNZERO detection for the huge zero folio
Date: Tue, 17 Jun 2025 16:35:32 +0200
is_zero_pfn() does not work for the huge zero folio. Fix it by using
is_huge_zero_pmd().
This can cause the PAGEMAP_SCAN ioctl against /proc/pid/pagemap to
present pages as PAGE_IS_PRESENT rather than as PAGE_IS_PFNZERO.
Found by code inspection.
Link: https://lkml.kernel.org/r/20250617143532.2375383-1-david@redhat.com
Fixes: 52526ca7fdb9 ("fs/proc/task_mmu: implement IOCTL to get and optionally clear info about PTEs")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Cc: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/proc/task_mmu.c~fs-proc-task_mmu-fix-page_is_pfnzero-detection-for-the-huge-zero-folio
+++ a/fs/proc/task_mmu.c
@@ -2182,7 +2182,7 @@ static unsigned long pagemap_thp_categor
categories |= PAGE_IS_FILE;
}
- if (is_zero_pfn(pmd_pfn(pmd)))
+ if (is_huge_zero_pmd(pmd))
categories |= PAGE_IS_PFNZERO;
if (pmd_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-gup-remove-vm_bug_ons.patch
mm-gup-remove-vm_bug_ons-fix.patch
mm-huge_memory-dont-ignore-queried-cachemode-in-vmf_insert_pfn_pud.patch
mm-huge_memory-dont-mark-refcounted-folios-special-in-vmf_insert_folio_pmd.patch
mm-huge_memory-dont-mark-refcounted-folios-special-in-vmf_insert_folio_pud.patch
This reverts minor parts of the changes made in commit b338d91703fa
("Bluetooth: Implement support for Mesh"). It looks like these changes
were only made for development purposes but shouldn't have been part of
the commit.
Fixes: b338d91703fa ("Bluetooth: Implement support for Mesh")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Eggers <ceggers(a)arri.de>
---
net/bluetooth/hci_sync.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 6687f2a4d1eb..1f8806dfa556 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1970,13 +1970,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk)
static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
{
struct adv_info *adv, *n;
- int err = 0;
if (ext_adv_capable(hdev))
/* Remove all existing sets */
- err = hci_clear_adv_sets_sync(hdev, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_clear_adv_sets_sync(hdev, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -2004,13 +2001,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance,
struct sock *sk)
{
- int err = 0;
+ int err;
/* If we use extended advertising, instance has to be removed first. */
if (ext_adv_capable(hdev))
- err = hci_remove_ext_adv_instance_sync(hdev, instance, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_remove_ext_adv_instance_sync(hdev, instance, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -2109,16 +2104,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type)
int hci_disable_advertising_sync(struct hci_dev *hdev)
{
u8 enable = 0x00;
- int err = 0;
/* If controller is not advertising we are done. */
if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
return 0;
if (ext_adv_capable(hdev))
- err = hci_disable_ext_adv_instance_sync(hdev, 0x00);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_disable_ext_adv_instance_sync(hdev, 0x00);
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
sizeof(enable), &enable, HCI_CMD_TIMEOUT);
--
2.43.0
The patch titled
Subject: mm/migrate.c: fix do_pages_stat to use compat_uptr_t
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fix-do_pages_stat-to-use-compat_uptr_t.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Christoph Berg <myon(a)debian.org>
Subject: mm/migrate.c: fix do_pages_stat to use compat_uptr_t
Date: Wed, 25 Jun 2025 17:24:14 +0200
For arrays with more than 16 entries, the old code would incorrectly
advance the pages pointer by 16 words instead of 16 compat_uptr_t.
Link: https://lkml.kernel.org/r/aFwUnu7ObizycCZ8@msg.df7cb.de
Signed-off-by: Christoph Berg <myon(a)debian.org>
Suggested-by: Bertrand Drouvot <bertranddrouvot.pg(a)gmail.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: Byungchul Park <byungchul(a)sk.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Gregory Price <gourry(a)gourry.net>
Cc: "Huang, Ying" <ying.huang(a)linux.alibaba.com>
Cc: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Cc: Mathew Brost <matthew.brost(a)intel.com>
Cc: Rakie Kim <rakie.kim(a)sk.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/migrate.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
--- a/mm/migrate.c~fix-do_pages_stat-to-use-compat_uptr_t
+++ a/mm/migrate.c
@@ -2444,7 +2444,13 @@ static int do_pages_stat(struct mm_struc
if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
break;
- pages += chunk_nr;
+ if (in_compat_syscall()) {
+ compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages;
+
+ pages32 += chunk_nr;
+ pages = (const void __user * __user *) pages32;
+ } else
+ pages += chunk_nr;
status += chunk_nr;
nr_pages -= chunk_nr;
}
_
Patches currently in -mm which might be from myon(a)debian.org are
fix-do_pages_stat-to-use-compat_uptr_t.patch
From: Paulo Alcantara <pc(a)manguebit.org>
Some users and customers reported that their backup/copy tools started
to fail when the directory being copied contained symlink targets that
the client couldn't parse - even when those symlinks weren't followed.
Fix this by allowing lstat(2) and readlink(2) to succeed even when the
client can't resolve the symlink target, restoring old behavior.
Cc: linux-cifs(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Reported-by: Remy Monsen <monsen(a)monsen.cc>
Closes: https://lore.kernel.org/r/CAN+tdP7y=jqw3pBndZAGjQv0ObFq8Q=+PUDHgB36HdEz9QA6…
Reported-by: Pierguido Lambri <plambri(a)redhat.com>
Fixes: 12b466eb52d9 ("cifs: Fix creating and resolving absolute NT-style symlinks")
Signed-off-by: Paulo Alcantara (Red Hat) <pc(a)manguebit.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: David Howells <dhowells(a)redhat.com>
---
fs/smb/client/reparse.c | 20 ++++----------------
1 file changed, 4 insertions(+), 16 deletions(-)
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 511611206dab..1c40e42e4d89 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -875,15 +875,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
abs_path += sizeof("\\DosDevices\\")-1;
else if (strstarts(abs_path, "\\GLOBAL??\\"))
abs_path += sizeof("\\GLOBAL??\\")-1;
- else {
- /* Unhandled absolute symlink, points outside of DOS/Win32 */
- cifs_dbg(VFS,
- "absolute symlink '%s' cannot be converted from NT format "
- "because points to unknown target\n",
- smb_target);
- rc = -EIO;
- goto out;
- }
+ else
+ goto out_unhandled_target;
/* Sometimes path separator after \?? is double backslash */
if (abs_path[0] == '\\')
@@ -910,13 +903,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
abs_path++;
abs_path[0] = drive_letter;
} else {
- /* Unhandled absolute symlink. Report an error. */
- cifs_dbg(VFS,
- "absolute symlink '%s' cannot be converted from NT format "
- "because points to unknown target\n",
- smb_target);
- rc = -EIO;
- goto out;
+ goto out_unhandled_target;
}
abs_path_len = strlen(abs_path)+1;
@@ -966,6 +953,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
* These paths have same format as Linux symlinks, so no
* conversion is needed.
*/
+out_unhandled_target:
linux_target = smb_target;
smb_target = NULL;
}