From: Miklos Szeredi <mszeredi(a)redhat.com>
commit 97f044f690bac2b094bfb7fb2d177ef946c85880 upstream.
The fuse_iget() call in create_new_entry() already updated the inode with
all the new attributes and incremented the attribute version.
Incrementing the nlink will result in the wrong count. This wasn't noticed
because the attributes were invalidated right after this.
Updating ctime is still needed for the writeback case when the ctime is not
refreshed.
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
Signed-off-by: Dmitriy Privalov <d.privalov(a)omp.ru>
---
fs/fuse/dir.c | 29 ++++++++++-------------------
1 file changed, 10 insertions(+), 19 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4488a53a192d..7055fdc1b8ce 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -807,7 +807,7 @@ void fuse_flush_time_update(struct inode *inode)
mapping_set_error(inode->i_mapping, err);
}
-void fuse_update_ctime(struct inode *inode)
+static void fuse_update_ctime_in_cache(struct inode *inode)
{
if (!IS_NOCMTIME(inode)) {
inode->i_ctime = current_time(inode);
@@ -816,6 +816,12 @@ void fuse_update_ctime(struct inode *inode)
}
}
+void fuse_update_ctime(struct inode *inode)
+{
+ fuse_invalidate_attr(inode);
+ fuse_update_ctime_in_cache(inode);
+}
+
static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
@@ -986,25 +992,10 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
args.in_args[1].size = newent->d_name.len + 1;
args.in_args[1].value = newent->d_name.name;
err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
- /* Contrary to "normal" filesystems it can happen that link
- makes two "logical" inodes point to the same "physical"
- inode. We invalidate the attributes of the old one, so it
- will reflect changes in the backing inode (link count,
- etc.)
- */
- if (!err) {
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- spin_lock(&fi->lock);
- fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
- if (likely(inode->i_nlink < UINT_MAX))
- inc_nlink(inode);
- spin_unlock(&fi->lock);
- fuse_invalidate_attr(inode);
- fuse_update_ctime(inode);
- } else if (err == -EINTR) {
+ if (!err)
+ fuse_update_ctime_in_cache(inode);
+ else if (err == -EINTR)
fuse_invalidate_attr(inode);
- }
return err;
}
--
2.34.1
From: Jakob Unterwurzacher <jakob.unterwurzacher(a)cherry.de>
Hardware CS has a very slow rise time of about 6us,
causing transmission errors when CS does not reach
high between transaction.
It looks like it's not driven actively when transitioning
from low to high but switched to input, so only the CPU
pull-up pulls it high, slowly. Transitions from high to low
are fast. On the oscilloscope, CS looks like an irregular sawtooth
pattern like this:
_____
^ / |
^ /| / |
/| / | / |
/ | / | / |
___/ |___/ |_____/ |___
With cs-gpios we have a CS rise time of about 20ns, as it should be,
and CS looks rectangular.
This fixes the data errors when running a flashcp loop against a
m25p40 spi flash.
With the Rockchip 6.1 kernel we see the same slow rise time, but
for some reason CS is always high for long enough to reach a solid
high.
The RK3399 and RK3588 SoCs use the same SPI driver, so we also
checked our "Puma" (RK3399) and "Tiger" (RK3588) boards.
They do not have this problem. Hardware CS rise time is good.
Fixes: c484cf93f61b ("arm64: dts: rockchip: add PX30-µQ7 (Ringneck) SoM with Haikou baseboard")
Cc: stable(a)vger.kernel.org
Reviewed-by: Quentin Schulz <quentin.schulz(a)cherry.de>
Signed-off-by: Jakob Unterwurzacher <jakob.unterwurzacher(a)cherry.de>
---
.../boot/dts/rockchip/px30-ringneck.dtsi | 22 +++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
index ab232e5c7ad6..dcc62dd9b894 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
@@ -379,6 +379,18 @@ pmic_int: pmic-int {
<0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
+
+ spi1 {
+ spi1_csn0_gpio: spi1-csn0-gpio {
+ rockchip,pins =
+ <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+
+ spi1_csn1_gpio: spi1-csn1-gpio {
+ rockchip,pins =
+ <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+ };
};
&pmu_io_domains {
@@ -396,6 +408,16 @@ &sdmmc {
vqmmc-supply = <&vccio_sd>;
};
+&spi1 {
+ /*
+ * Hardware CS has a very slow rise time of about 6us,
+ * causing transmission errors.
+ * With cs-gpios we have a rise time of about 20ns.
+ */
+ cs-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>, <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>;
+ pinctrl-0 = <&spi1_clk &spi1_csn0_gpio &spi1_csn1_gpio &spi1_miso &spi1_mosi>;
+};
+
&tsadc {
status = "okay";
};
--
2.39.5
In hidg_bind(), if alloc_workqueue() fails after usb_assign_descriptors()
has successfully allocated the USB descriptors, the current error handling
does not call usb_free_all_descriptors() to free the allocated descriptors,
resulting in a memory leak.
Restructure the error handling by adding proper cleanup labels:
- fail_free_all: cleans up workqueue and descriptors
- fail_free_descs: cleans up descriptors only
- fail: original cleanup for earlier failures
This ensures that allocated resources are properly freed in reverse order
of their allocation, preventing the memory leak when alloc_workqueue() fails.
Fixes: a139c98f760ef ("USB: gadget: f_hid: Add GET_REPORT via userspace IOCTL")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yuhao Jiang <danisjiang(a)gmail.com>
---
drivers/usb/gadget/function/f_hid.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 97a62b926415..8e1d1e884050 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -1278,18 +1278,19 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
if (!hidg->workqueue) {
status = -ENOMEM;
- goto fail;
+ goto fail_free_descs;
}
/* create char device */
cdev_init(&hidg->cdev, &f_hidg_fops);
status = cdev_device_add(&hidg->cdev, &hidg->dev);
if (status)
- goto fail_free_descs;
+ goto fail_free_all;
return 0;
-fail_free_descs:
+fail_free_all:
destroy_workqueue(hidg->workqueue);
+fail_free_descs:
usb_free_all_descriptors(f);
fail:
ERROR(f->config->cdev, "hidg_bind FAILED\n");
--
2.43.0
The commit "13bcd440f2ff nvmem: core: verify cell's raw_len" caused an
extension of the "mac-address" cell from 6 to 8 bytes due to word_size
of 4 bytes.
Thus, the required byte swap for the mac-address of the full buffer length,
caused an trucation of the read mac-address.
From the original address 70:B3:D5:14:E9:0E to 00:00:70:B3:D5:14
After swapping only the first 6 bytes, the mac-address is correctly passed
to the upper layers.
Fixes: 13bcd440f2ff ("nvmem: core: verify cell's raw_len")
Cc: stable(a)vger.kernel.org
Signed-off-by: Steffen Bätz <steffen(a)innosonix.de>
---
v2:
- Add Cc: stable(a)vger.kernel.org as requested by Greg KH's patch bot
drivers/nvmem/imx-ocotp-ele.c | 2 ++
drivers/nvmem/imx-ocotp.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/drivers/nvmem/imx-ocotp-ele.c b/drivers/nvmem/imx-ocotp-ele.c
index ca6dd71d8a2e..3af7968f5a34 100644
--- a/drivers/nvmem/imx-ocotp-ele.c
+++ b/drivers/nvmem/imx-ocotp-ele.c
@@ -119,6 +119,8 @@ static int imx_ocotp_cell_pp(void *context, const char *id, int index,
/* Deal with some post processing of nvmem cell data */
if (id && !strcmp(id, "mac-address"))
+ if (bytes > 6)
+ bytes = 6;
for (i = 0; i < bytes / 2; i++)
swap(buf[i], buf[bytes - i - 1]);
diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c
index 79dd4fda0329..63e9974d9618 100644
--- a/drivers/nvmem/imx-ocotp.c
+++ b/drivers/nvmem/imx-ocotp.c
@@ -228,6 +228,8 @@ static int imx_ocotp_cell_pp(void *context, const char *id, int index,
/* Deal with some post processing of nvmem cell data */
if (id && !strcmp(id, "mac-address"))
+ if (bytes > 6)
+ bytes = 6;
for (i = 0; i < bytes / 2; i++)
swap(buf[i], buf[bytes - i - 1]);
--
2.43.0
--
*innosonix GmbH*
Hauptstr. 35
96482 Ahorn
central: +49 9561 7459980
www.innosonix.de <http://www.innosonix.de>
innosonix GmbH
Geschäftsführer:
Markus Bätz, Steffen Bätz
USt.-IdNr / VAT-Nr.: DE266020313
EORI-Nr.:
DE240121536680271
HRB 5192 Coburg
WEEE-Reg.-Nr. DE88021242
From: Ashish Kalra <ashish.kalra(a)amd.com>
Panic notifiers are invoked with RCU read lock held and when the
SNP panic notifier tries to unregister itself from the panic
notifier callback itself it causes a deadlock as notifier
unregistration does RCU synchronization.
Code flow for SNP panic notifier:
snp_shutdown_on_panic() ->
__sev_firmware_shutdown() ->
__sev_snp_shutdown_locked() ->
atomic_notifier_chain_unregister(.., &snp_panic_notifier)
Fix SNP panic notifier to unregister itself during SNP shutdown
only if panic is not in progress.
Reviewed-by: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: stable(a)vger.kernel.org
Fixes: 19860c3274fb ("crypto: ccp - Register SNP panic notifier only if SNP is enabled")
Signed-off-by: Ashish Kalra <ashish.kalra(a)amd.com>
---
drivers/crypto/ccp/sev-dev.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 8fb94c5f006a..17edc6bf5622 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -1787,8 +1787,14 @@ static int __sev_snp_shutdown_locked(int *error, bool panic)
sev->snp_initialized = false;
dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n");
- atomic_notifier_chain_unregister(&panic_notifier_list,
- &snp_panic_notifier);
+ /*
+ * __sev_snp_shutdown_locked() deadlocks when it tries to unregister
+ * itself during panic as the panic notifier is called with RCU read
+ * lock held and notifier unregistration does RCU synchronization.
+ */
+ if (!panic)
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &snp_panic_notifier);
/* Reset TMR size back to default */
sev_es_tmr_size = SEV_TMR_SIZE;
--
2.34.1
Greetings,
The following patch [1]:
"42fac18 btrfs: check delayed refs when we're checking if a ref exists"
has been marked as
"CC: stable(a)vger.kernel.org # 5.4+"
but I do not see that it has been backported to linux-6.6.y branch.
Can this patch be picked up in the next version of linux-6.6 please?
Thanks,
Alex.
[1]
commit 42fac187b5c746227c92d024f1caf33bc1d337e4
Author: Josef Bacik <josef(a)toxicpanda.com>
Date: Thu Apr 11 16:41:20 2024 -0400
btrfs: check delayed refs when we're checking if a ref exists
In the patch 78c52d9eb6b7 ("btrfs: check for refs on snapshot delete
resume") I added some code to handle file systems that had been
corrupted by a bug that incorrectly skipped updating the drop progress
key while dropping a snapshot. This code would check to see if we had
already deleted our reference for a child block, and skip the deletion
if we had already.
...
Fixes: 78c52d9eb6b7 ("btrfs: check for refs on snapshot delete resume")
CC: stable(a)vger.kernel.org # 5.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
[ Upstream commit 6043b794c7668c19dabc4a93c75b924a19474d59 ]
[ Note: Fixed conflict due to unrelated change in
inet_proto_csum_replace_by_diff. ]
During ILA address translations, the L4 checksums can be handled in
different ways. One of them, adj-transport, consist in parsing the
transport layer and updating any found checksum. This logic relies on
inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when
in state CHECKSUM_COMPLETE.
This bug can be reproduced with a simple ILA to SIR mapping, assuming
packets are received with CHECKSUM_COMPLETE:
$ ip a show dev eth0
14: eth0@if15: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet6 3333:0:0:1::c078/64 scope global
valid_lft forever preferred_lft forever
inet6 fd00:10:244:1::c078/128 scope global nodad
valid_lft forever preferred_lft forever
inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
$ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \
csum-mode adj-transport ident-type luid dev eth0
Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on
[3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with
SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed
skb->csum. The translation and drop are visible on pwru [1] traces:
IFACE TUPLE FUNC
eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ipv6_rcv
eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ip6_rcv_core
eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) nf_hook_slow
eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) inet_proto_csum_replace_by_diff
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_early_demux
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_route_input
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input_finish
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_protocol_deliver_rcu
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) raw6_local_deliver
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ipv6_raw_deliver
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_rcv
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) __skb_checksum_complete
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM)
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_head_state
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_data
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_free_head
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skbmem
This is happening because inet_proto_csum_replace_by_diff is updating
skb->csum when it shouldn't. The L4 checksum is updated such that it
"cancels" the IPv6 address change in terms of checksum computation, so
the impact on skb->csum is null.
Note this would be different for an IPv4 packet since three fields
would be updated: the IPv4 address, the IP checksum, and the L4
checksum. Two would cancel each other and skb->csum would still need
to be updated to take the L4 checksum change into account.
This patch fixes it by passing an ipv6 flag to
inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're
in the IPv6 case. Note the behavior of the only other user of
inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in
this patch and fixed in the subsequent patch.
With the fix, using the reproduction from above, I can confirm
skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP
SYN proceeds to the application after the ILA translation.
Link: https://github.com/cilium/pwru [1]
Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module")
Signed-off-by: Paul Chaignon <paul.chaignon(a)gmail.com>
Acked-by: Daniel Borkmann <daniel(a)iogearbox.net>
Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.174850948…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Paul Chaignon <paul.chaignon(a)gmail.com>
---
include/net/checksum.h | 2 +-
net/core/filter.c | 2 +-
net/core/utils.c | 4 ++--
net/ipv6/ila/ila_common.c | 6 +++---
4 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/include/net/checksum.h b/include/net/checksum.h
index d3b5d368a0ca..c975c76b4dd4 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -154,7 +154,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr);
+ __wsum diff, bool pseudohdr, bool ipv6);
static __always_inline
void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
diff --git a/net/core/filter.c b/net/core/filter.c
index 9d358fb865e2..65b7fb9c3d29 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1970,7 +1970,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
if (unlikely(from != 0))
return -EINVAL;
- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
+ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false);
break;
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
diff --git a/net/core/utils.c b/net/core/utils.c
index 1f31a39236d5..d010fcf1dc08 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
EXPORT_SYMBOL(inet_proto_csum_replace16);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr)
+ __wsum diff, bool pseudohdr, bool ipv6)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6)
skb->csum = ~csum_add(diff, ~skb->csum);
} else if (pseudohdr) {
*sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 95e9146918cc..b8d43ed4689d 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&th->check, skb,
- diff, true);
+ diff, true, true);
}
break;
case NEXTHDR_UDP:
@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&uh->check, skb,
- diff, true);
+ diff, true, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
- diff, true);
+ diff, true, true);
}
break;
}
--
2.43.0
Control: tag -1 + fixed-upstream
Control: forwarded -1 https://lore.kernel.org/r/20250530221824.work.623-kees@kernel.org
Hello,
On Mon, May 05, 2025 at 05:40:57PM +0200, Ingo Saitz wrote:
> When compiling the linux kernel (tested on 6.15-rc5 and 6.14.5 from
> kernel.org) with CONFIG_RANDSTRUCT enabled, gcc-15 throws an ICE:
>
> arch/x86/kernel/cpu/proc.c:174:14: internal compiler error: in comptypes_check_enum_int, at c/c-typeck.cc:1516
> 174 | const struct seq_operations cpuinfo_op = {
> | ^~~~~~~~~~~~~~
This is claimed to be fixed in upstream by commit
https://git.kernel.org/linus/f39f18f3c3531aa802b58a20d39d96e82eb96c14
that is scheduled to be included in 6.16-rc1.
This wasn't explicitly marked for stable, but I think a backport would
be good.
Best regards
Uwe
In 6.1/6.6 there is no BACKLIGHT_POWER_ON definition so a build error
occurs due to recently backport:
CC drivers/platform/loongarch/loongson-laptop.o
drivers/platform/loongarch/loongson-laptop.c: In function 'laptop_backlight_register':
drivers/platform/loongarch/loongson-laptop.c:428:23: error: 'BACKLIGHT_POWER_ON' undeclared (first use in this function)
428 | props.power = BACKLIGHT_POWER_ON;
| ^~~~~~~~~~~~~~~~~~
Use FB_BLANK_UNBLANK instead which has the same meaning.
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
drivers/platform/loongarch/loongson-laptop.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/platform/loongarch/loongson-laptop.c b/drivers/platform/loongarch/loongson-laptop.c
index 61b18ac206c9..5fcfa3a7970b 100644
--- a/drivers/platform/loongarch/loongson-laptop.c
+++ b/drivers/platform/loongarch/loongson-laptop.c
@@ -425,7 +425,7 @@ static int laptop_backlight_register(void)
props.max_brightness = status;
props.brightness = ec_get_brightness();
- props.power = BACKLIGHT_POWER_ON;
+ props.power = FB_BLANK_UNBLANK;
props.type = BACKLIGHT_PLATFORM;
backlight_device_register("loongson_laptop",
--
2.47.1
During the High-Speed Isochronous Audio transfers, xHCI
controller on certain AMD platforms experiences momentary data
loss. This results in Missed Service Errors (MSE) being
generated by the xHCI.
The root cause of the MSE is attributed to the ISOC OUT endpoint
being omitted from scheduling. This can happen when an IN
endpoint with a 64ms service interval either is pre-scheduled
prior to the ISOC OUT endpoint or the interval of the ISOC OUT
endpoint is shorter than that of the IN endpoint. Consequently,
the OUT service is neglected when an IN endpoint with a service
interval exceeding 32ms is scheduled concurrently (every 64ms in
this scenario).
This issue is particularly seen on certain older AMD platforms.
To mitigate this problem, it is recommended to adjust the service
interval of the IN endpoint to not exceed 32ms (interval 8). This
adjustment ensures that the OUT endpoint will not be bypassed,
even if a smaller interval value is utilized.
Cc: stable(a)vger.kernel.org
Signed-off-by: Raju Rangoju <Raju.Rangoju(a)amd.com>
---
Changes since v4:
- reword the commit message.
- handle the potential corner case with ISOC IN ep with 64ms ESIT
Changes since v3:
- Bump up the enum number XHCI_LIMIT_ENDPOINT_INTERVAL_9
Changes since v2:
- added stable tag to backport to all stable kernels
Changes since v1:
- replaced hex values with pci device names
- corrected the commit message
---
drivers/usb/host/xhci-mem.c | 4 ++++
drivers/usb/host/xhci-pci.c | 25 +++++++++++++++++++++++++
drivers/usb/host/xhci.h | 1 +
3 files changed, 30 insertions(+)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index bd745a0f2f78..6680afa4f596 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1449,6 +1449,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
/* Periodic endpoint bInterval limit quirk */
if (usb_endpoint_xfer_int(&ep->desc) ||
usb_endpoint_xfer_isoc(&ep->desc)) {
+ if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_9) &&
+ interval >= 9) {
+ interval = 8;
+ }
if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) &&
udev->speed >= USB_SPEED_HIGH &&
interval >= 7) {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 0c481cbc8f08..00fac8b233d2 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -71,12 +71,22 @@
#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec
#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0
+#define PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI 0x13ed
+#define PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI 0x13ee
+#define PCI_DEVICE_ID_AMD_STARSHIP_XHCI 0x148c
+#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI 0x15d4
+#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI 0x15d5
+#define PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI 0x15e0
+#define PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI 0x15e1
+#define PCI_DEVICE_ID_AMD_RAVEN2_XHCI 0x15e5
#define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639
#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba
#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb
#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc
+#define PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI 0x7316
+
#define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042
#define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142
#define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242
@@ -280,6 +290,21 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_NEC)
xhci->quirks |= XHCI_NEC_HOST;
+ if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+ (pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_STARSHIP_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN2_XHCI))
+ xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9;
+
+ if (pdev->vendor == PCI_VENDOR_ID_ATI &&
+ pdev->device == PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI)
+ xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9;
+
if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96)
xhci->quirks |= XHCI_AMD_0x96_HOST;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 49887a303e43..ef7eaabc12fd 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1643,6 +1643,7 @@ struct xhci_hcd {
#define XHCI_WRITE_64_HI_LO BIT_ULL(47)
#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48)
#define XHCI_ETRON_HOST BIT_ULL(49)
+#define XHCI_LIMIT_ENDPOINT_INTERVAL_9 BIT_ULL(50)
unsigned int num_active_eps;
unsigned int limit_active_eps;
--
2.34.1
Hello,
we're facing a regression after updating to 5.15.179+.
I've found that fbdee71bb5d8d054e1bdb5af4c540f2cb86fe296 (block: deprecate autoloading based on dev_t) was merged that disabled autoloading of modules.
This broke mounting loopback devices on our side.
Why wasn't 451f0b6f4c44d7b649ae609157b114b71f6d7875 (block: default BLOCK_LEGACY_AUTOLOAD to y) merged, too?
This commit was merged for 6.1.y, 6.6.y and 6.12y, but not for 5.15.y.
Please consider merging this for 5.15.y soon.
Thanks.
Sebastian
From: Marek Vasut <marex(a)denx.de>
[ Upstream commit 8bad8c923f217d238ba4f1a6d19d761e53bfbd26 ]
The VSELECT pin is configured as MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT
and not as a GPIO, drop the bogus sd-vsel-gpios property as the eSDHC
block handles the VSELECT pin on its own.
Signed-off-by: Marek Vasut <marex(a)denx.de>
Reviewed-by: Frieder Schrempf <frieder.schrempf(a)kontron.de>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
Signed-off-by: Francesco Dolcini <francesco.dolcini(a)toradex.com>
---
6.1.y is currently broken on imx8mm-verdin, because commit
5591ce0069ddda97cdbbea596bed53e698f399c2, that was backported correctly on 6.1,
depends on this one.
This fixes the following error:
[ 1.735149] gpio-regulator: probe of regulator-usdhc2-vqmmc failed with error -16
v2: add missing s-o-b francesco
---
arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index 5b2493bb8dd9..37acaf62f5c7 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -362,7 +362,6 @@ pca9450: pmic@25 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_pmic>;
reg = <0x25>;
- sd-vsel-gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>;
/*
* The bootloader is expected to switch on the I2C level shifter for the TLA2024 ADC
--
2.39.5
Hello,
Please consider this series for 6.1.y
This patch series backports two patches that implement a test to verify
that a symbol with KSYM_NAME_LEN of 512 can be read.
The first patch implements the test. This commit also includes a fix
for the test x86/insn_decoder_test. In the case a symbol exceeds the
symbol length limit, an error will happen:
arch/x86/tools/insn_decoder_test: error: malformed line 1152000:
tBb_+0xf2>
..which overflowed by 10 characters reading this line:
ffffffff81458193: 74 3d je
ffffffff814581d2
<_RNvXse_NtNtNtCshGpAVYOtgW1_4core4iter8adapters7flattenINtB5_13FlattenCompatINtNtB7_3map3MapNtNtNtBb_3str4iter5CharsNtB1v_17CharEscapeDefaultENtNtBb_4char13EscapeDefaultENtNtBb_3fmt5Debug3fmtBb_+0xf2>
The fix was proposed in [1] and initially mentioned at [2].
The second patch fixes a warning when building with clang because
there was a definition of unlikely from compiler.h in tools/include/linux,
which conflicted with the one in the instruction decoder selftest.
[1] https://lore.kernel.org/lkml/Y9ES4UKl%2F+DtvAVS@gmail.com/
[2] https://lore.kernel.org/lkml/320c4dba-9919-404b-8a26-a8af16be1845@app.fastm…
Thanks,
Sergio
Nathan Chancellor (1):
x86/tools: Drop duplicate unlikely() definition in insn_decoder_test.c
Sergio González Collado (1):
Kunit to check the longest symbol length
arch/x86/tools/insn_decoder_test.c | 5 +-
lib/Kconfig.debug | 9 ++++
lib/Makefile | 2 +
lib/longest_symbol_kunit.c | 82 ++++++++++++++++++++++++++++++
4 files changed, 95 insertions(+), 3 deletions(-)
create mode 100644 lib/longest_symbol_kunit.c
base-commit: 58485ff1a74f6c5be9e7c6aafb7293e4337348e7
--
2.39.2
Hello,
Please consider this series for 6.6.y
This patch series backports two patches that implement a test to verify
that a symbol with KSYM_NAME_LEN of 512 can be read.
The first patch implements the test. This commit also includes a fix
for the test x86/insn_decoder_test. In the case a symbol exceeds the
symbol length limit, an error will happen:
arch/x86/tools/insn_decoder_test: error: malformed line 1152000:
tBb_+0xf2>
..which overflowed by 10 characters reading this line:
ffffffff81458193: 74 3d je
ffffffff814581d2
<_RNvXse_NtNtNtCshGpAVYOtgW1_4core4iter8adapters7flattenINtB5_13FlattenCompatINtNtB7_3map3MapNtNtNtBb_3str4iter5CharsNtB1v_17CharEscapeDefaultENtNtBb_4char13EscapeDefaultENtNtBb_3fmt5Debug3fmtBb_+0xf2>
The fix was proposed in [1] and initially mentioned at [2].
The second patch fixes a warning when building with clang because
there was a definition of unlikely from compiler.h in tools/include/linux,
which conflicted with the one in the instruction decoder selftest.
[1] https://lore.kernel.org/lkml/Y9ES4UKl%2F+DtvAVS@gmail.com/
[2] https://lore.kernel.org/lkml/320c4dba-9919-404b-8a26-a8af16be1845@app.fastm…
Thanks,
Sergio
Nathan Chancellor (1):
x86/tools: Drop duplicate unlikely() definition in insn_decoder_test.c
Sergio González Collado (1):
Kunit to check the longest symbol length
arch/x86/tools/insn_decoder_test.c | 5 +-
lib/Kconfig.debug | 9 ++++
lib/Makefile | 2 +
lib/longest_symbol_kunit.c | 82 ++++++++++++++++++++++++++++++
4 files changed, 95 insertions(+), 3 deletions(-)
create mode 100644 lib/longest_symbol_kunit.c
base-commit: 6282921b6825fef6a1243e1c80063421d41e2576
--
2.39.2
On Thu, Jun 19, 2025 at 10:28:00PM -0400, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> arm64/cpuinfo: only show one cpu's info in c_show()
>
> to the 6.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> arm64-cpuinfo-only-show-one-cpu-s-info-in-c_show.patch
> and it can be found in the queue-6.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
I don't think this one needs to be backported.
Will
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x b0823d5fbacb1c551d793cbfe7af24e0d1fa45ed
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062336-reorder-unaware-0fd1@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b0823d5fbacb1c551d793cbfe7af24e0d1fa45ed Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang(a)linux.intel.com>
Date: Thu, 12 Jun 2025 07:38:18 -0700
Subject: [PATCH] perf/x86/intel: Fix crash in icl_update_topdown_event()
The perf_fuzzer found a hard-lockup crash on a RaptorLake machine:
Oops: general protection fault, maybe for address 0xffff89aeceab400: 0000
CPU: 23 UID: 0 PID: 0 Comm: swapper/23
Tainted: [W]=WARN
Hardware name: Dell Inc. Precision 9660/0VJ762
RIP: 0010:native_read_pmc+0x7/0x40
Code: cc e8 8d a9 01 00 48 89 03 5b cd cc cc cc cc 0f 1f ...
RSP: 000:fffb03100273de8 EFLAGS: 00010046
....
Call Trace:
<TASK>
icl_update_topdown_event+0x165/0x190
? ktime_get+0x38/0xd0
intel_pmu_read_event+0xf9/0x210
__perf_event_read+0xf9/0x210
CPUs 16-23 are E-core CPUs that don't support the perf metrics feature.
The icl_update_topdown_event() should not be invoked on these CPUs.
It's a regression of commit:
f9bdf1f95339 ("perf/x86/intel: Avoid disable PMU if !cpuc->enabled in sample read")
The bug introduced by that commit is that the is_topdown_event() function
is mistakenly used to replace the is_topdown_count() call to check if the
topdown functions for the perf metrics feature should be invoked.
Fix it.
Fixes: f9bdf1f95339 ("perf/x86/intel: Avoid disable PMU if !cpuc->enabled in sample read")
Closes: https://lore.kernel.org/lkml/352f0709-f026-cd45-e60c-60dfd97f73f3@maine.edu/
Reported-by: Vince Weaver <vincent.weaver(a)maine.edu>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Tested-by: Vince Weaver <vincent.weaver(a)maine.edu>
Cc: stable(a)vger.kernel.org # v6.15+
Link: https://lore.kernel.org/r/20250612143818.2889040-1-kan.liang@linux.intel.com
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 741b229f0718..c2fb729c270e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2826,7 +2826,7 @@ static void intel_pmu_read_event(struct perf_event *event)
* If the PEBS counters snapshotting is enabled,
* the topdown event is available in PEBS records.
*/
- if (is_topdown_event(event) && !is_pebs_counter_event_group(event))
+ if (is_topdown_count(event) && !is_pebs_counter_event_group(event))
static_call(intel_pmu_update_topdown_event)(event, NULL);
else
intel_pmu_drain_pebs_buffer();
From: Kairui Song <kasong(a)tencent.com>
The current swap-in code assumes that, when a swap entry in shmem
mapping is order 0, its cached folios (if present) must be order 0
too, which turns out not always correct.
The problem is shmem_split_large_entry is called before verifying the
folio will eventually be swapped in, one possible race is:
CPU1 CPU2
shmem_swapin_folio
/* swap in of order > 0 swap entry S1 */
folio = swap_cache_get_folio
/* folio = NULL */
order = xa_get_order
/* order > 0 */
folio = shmem_swap_alloc_folio
/* mTHP alloc failure, folio = NULL */
<... Interrupted ...>
shmem_swapin_folio
/* S1 is swapped in */
shmem_writeout
/* S1 is swapped out, folio cached */
shmem_split_large_entry(..., S1)
/* S1 is split, but the folio covering it has order > 0 now */
Now any following swapin of S1 will hang: `xa_get_order` returns 0,
and folio lookup will return a folio with order > 0. The
`xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will
always return false causing swap-in to return -EEXIST.
And this looks fragile. So fix this up by allowing seeing a larger folio
in swap cache, and check the whole shmem mapping range covered by the
swapin have the right swap value upon inserting the folio. And drop
the redundant tree walks before the insertion.
This will actually improve the performance, as it avoided two redundant
Xarray tree walks in the hot path, and the only side effect is that in
the failure path, shmem may redundantly reallocate a few folios
causing temporary slight memory pressure.
And worth noting, it may seems the order and value check before
inserting might help reducing the lock contention, which is not true.
The swap cache layer ensures raced swapin will either see a swap cache
folio or failed to do a swapin (we have SWAP_HAS_CACHE bit even if
swap cache is bypassed), so holding the folio lock and checking the
folio flag is already good enough for avoiding the lock contention.
The chance that a folio passes the swap entry value check but the
shmem mapping slot has changed should be very low.
Cc: stable(a)vger.kernel.org
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
---
mm/shmem.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index eda35be2a8d9..4e7ef343a29b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -884,7 +884,9 @@ static int shmem_add_to_page_cache(struct folio *folio,
pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
- long nr = folio_nr_pages(folio);
+ unsigned long nr = folio_nr_pages(folio);
+ swp_entry_t iter, swap;
+ void *entry;
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -896,14 +898,24 @@ static int shmem_add_to_page_cache(struct folio *folio,
gfp &= GFP_RECLAIM_MASK;
folio_throttle_swaprate(folio, gfp);
+ swap = iter = radix_to_swp_entry(expected);
do {
xas_lock_irq(&xas);
- if (expected != xas_find_conflict(&xas)) {
- xas_set_err(&xas, -EEXIST);
- goto unlock;
+ xas_for_each_conflict(&xas, entry) {
+ /*
+ * The range must either be empty, or filled with
+ * expected swap entries. Shmem swap entries are never
+ * partially freed without split of both entry and
+ * folio, so there shouldn't be any holes.
+ */
+ if (!expected || entry != swp_to_radix_entry(iter)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ iter.val += 1 << xas_get_order(&xas);
}
- if (expected && xas_find_conflict(&xas)) {
+ if (expected && iter.val - nr != swap.val) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
@@ -2323,7 +2335,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -ENOMEM;
goto failed;
}
- } else if (order != folio_order(folio)) {
+ } else if (order > folio_order(folio)) {
/*
* Swap readahead may swap in order 0 folios into swapcache
* asynchronously, while the shmem mapping can still stores
@@ -2348,15 +2360,15 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
+ } else if (order < folio_order(folio)) {
+ swap.val = round_down(swp_type(swap), folio_order(folio));
}
alloced:
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
- folio->swap.val != swap.val ||
- !shmem_confirm_swap(mapping, index, swap) ||
- xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
+ folio->swap.val != swap.val) {
error = -EEXIST;
goto unlock;
}
--
2.50.0
alloc_tag_top_users() attempts to lock alloc_tag_cttype->mod_lock
even when the alloc_tag_cttype is not allocated because:
1) alloc tagging is disabled because mem profiling is disabled
(!alloc_tag_cttype)
2) alloc tagging is enabled, but not yet initialized (!alloc_tag_cttype)
3) alloc tagging is enabled, but failed initialization
(!alloc_tag_cttype or IS_ERR(alloc_tag_cttype))
In all cases, alloc_tag_cttype is not allocated, and therefore
alloc_tag_top_users() should not attempt to acquire the semaphore.
This leads to a crash on memory allocation failure by attempting to
acquire a non-existent semaphore:
Oops: general protection fault, probably for non-canonical address 0xdffffc000000001b: 0000 [#3] SMP KASAN NOPTI
KASAN: null-ptr-deref in range [0x00000000000000d8-0x00000000000000df]
CPU: 2 UID: 0 PID: 1 Comm: systemd Tainted: G D 6.16.0-rc2 #1 VOLUNTARY
Tainted: [D]=DIE
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:down_read_trylock+0xaa/0x3b0
Code: d0 7c 08 84 d2 0f 85 a0 02 00 00 8b 0d df 31 dd 04 85 c9 75 29 48 b8 00 00 00 00 00 fc ff df 48 8d 6b 68 48 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 88 02 00 00 48 3b 5b 68 0f 85 53 01 00 00 65 ff
RSP: 0000:ffff8881002ce9b8 EFLAGS: 00010016
RAX: dffffc0000000000 RBX: 0000000000000070 RCX: 0000000000000000
RDX: 000000000000001b RSI: 000000000000000a RDI: 0000000000000070
RBP: 00000000000000d8 R08: 0000000000000001 R09: ffffed107dde49d1
R10: ffff8883eef24e8b R11: ffff8881002cec20 R12: 1ffff11020059d37
R13: 00000000003fff7b R14: ffff8881002cec20 R15: dffffc0000000000
FS: 00007f963f21d940(0000) GS:ffff888458ca6000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f963f5edf71 CR3: 000000010672c000 CR4: 0000000000350ef0
Call Trace:
<TASK>
codetag_trylock_module_list+0xd/0x20
alloc_tag_top_users+0x369/0x4b0
__show_mem+0x1cd/0x6e0
warn_alloc+0x2b1/0x390
__alloc_frozen_pages_noprof+0x12b9/0x21a0
alloc_pages_mpol+0x135/0x3e0
alloc_slab_page+0x82/0xe0
new_slab+0x212/0x240
___slab_alloc+0x82a/0xe00
</TASK>
As David Wang points out, this issue became easier to trigger after commit
780138b12381 ("alloc_tag: check mem_profiling_support in alloc_tag_init").
Before the commit, the issue occurred only when it failed to allocate
and initialize alloc_tag_cttype or if a memory allocation fails before
alloc_tag_init() is called. After the commit, it can be easily triggered
when memory profiling is compiled but disabled at boot.
To properly determine whether alloc_tag_init() has been called and
its data structures initialized, verify that alloc_tag_cttype is a valid
pointer before acquiring the semaphore. If the variable is NULL or an error
value, it has not been properly initialized. In such a case, just skip
and do not attempt acquire the semaphore.
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202506181351.bba867dd-lkp@intel.com
Fixes: 780138b12381 ("alloc_tag: check mem_profiling_support in alloc_tag_init")
Fixes: 1438d349d16b ("lib: add memory allocations report in show_mem()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
---
v1 -> v2:
- v1 fixed the bug only when MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=n.
v2 now fixes the bug even when MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=y.
I didn't expect alloc_tag_cttype to be NULL when
mem_profiling_support is true, but as David points out (Thanks David!)
if a memory allocation fails before alloc_tag_init(), it can be NULL.
So instead of indirectly checking mem_profiling_support, just directly
check if alloc_tag_cttype is allocated.
- Closes: https://lore.kernel.org/oe-lkp/202505071555.e757f1e0-lkp@intel.com
tag was removed because it was not a crash and not relevant to this
patch.
- Added Cc: stable because, if an allocation fails before
alloc_tag_init(), it can be triggered even prior-780138b12381.
I verified that the bug can be triggered in v6.12 and fixed by this
patch.
It should be quite difficult to trigger in practice, though.
Maybe I'm a bit paranoid?
lib/alloc_tag.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 66a4628185f7..d8ec4c03b7d2 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -124,7 +124,9 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl
struct codetag_bytes n;
unsigned int i, nr = 0;
- if (can_sleep)
+ if (IS_ERR_OR_NULL(alloc_tag_cttype))
+ return 0;
+ else if (can_sleep)
codetag_lock_module_list(alloc_tag_cttype, true);
else if (!codetag_trylock_module_list(alloc_tag_cttype))
return 0;
--
2.43.0
The patch titled
Subject: mm/ptdump: take the memory hotplug lock inside ptdump_walk_pgd()
has been added to the -mm mm-new branch. Its filename is
mm-ptdump-take-the-memory-hotplug-lock-inside-ptdump_walk_pgd.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Anshuman Khandual <anshuman.khandual(a)arm.com>
Subject: mm/ptdump: take the memory hotplug lock inside ptdump_walk_pgd()
Date: Fri, 20 Jun 2025 10:54:27 +0530
Memory hot remove unmaps and tears down various kernel page table regions
as required. The ptdump code can race with concurrent modifications of
the kernel page tables. When leaf entries are modified concurrently, the
dump code may log stale or inconsistent information for a VA range, but
this is otherwise not harmful.
But when intermediate levels of kernel page table are freed, the dump code
will continue to use memory that has been freed and potentially
reallocated for another purpose. In such cases, the ptdump code may
dereference bogus addresses, leading to a number of potential problems.
To avoid the above mentioned race condition, platforms such as arm64,
riscv and s390 take memory hotplug lock, while dumping kernel page table
via the sysfs interface /sys/kernel/debug/kernel_page_tables.
Similar race condition exists while checking for pages that might have
been marked W+X via /sys/kernel/debug/kernel_page_tables/check_wx_pages
which in turn calls ptdump_check_wx(). Instead of solving this race
condition again, let's just move the memory hotplug lock inside generic
ptdump_check_wx() which will benefit both the scenarios.
Drop get_online_mems() and put_online_mems() combination from all existing
platform ptdump code paths.
Link: https://lkml.kernel.org/r/20250620052427.2092093-1-anshuman.khandual@arm.com
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm64/mm/ptdump_debugfs.c | 3 ---
arch/riscv/mm/ptdump.c | 3 ---
arch/s390/mm/dump_pagetables.c | 2 --
mm/ptdump.c | 2 ++
4 files changed, 2 insertions(+), 8 deletions(-)
--- a/arch/arm64/mm/ptdump_debugfs.c~mm-ptdump-take-the-memory-hotplug-lock-inside-ptdump_walk_pgd
+++ a/arch/arm64/mm/ptdump_debugfs.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <asm/ptdump.h>
@@ -9,9 +8,7 @@ static int ptdump_show(struct seq_file *
{
struct ptdump_info *info = m->private;
- get_online_mems();
ptdump_walk(m, info);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
--- a/arch/riscv/mm/ptdump.c~mm-ptdump-take-the-memory-hotplug-lock-inside-ptdump_walk_pgd
+++ a/arch/riscv/mm/ptdump.c
@@ -6,7 +6,6 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <linux/ptdump.h>
@@ -413,9 +412,7 @@ bool ptdump_check_wx(void)
static int ptdump_show(struct seq_file *m, void *v)
{
- get_online_mems();
ptdump_walk(m, m->private);
- put_online_mems();
return 0;
}
--- a/arch/s390/mm/dump_pagetables.c~mm-ptdump-take-the-memory-hotplug-lock-inside-ptdump_walk_pgd
+++ a/arch/s390/mm/dump_pagetables.c
@@ -247,11 +247,9 @@ static int ptdump_show(struct seq_file *
.marker = markers,
};
- get_online_mems();
mutex_lock(&cpa_mutex);
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
mutex_unlock(&cpa_mutex);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
--- a/mm/ptdump.c~mm-ptdump-take-the-memory-hotplug-lock-inside-ptdump_walk_pgd
+++ a/mm/ptdump.c
@@ -176,6 +176,7 @@ void ptdump_walk_pgd(struct ptdump_state
{
const struct ptdump_range *range = st->range;
+ get_online_mems();
mmap_write_lock(mm);
while (range->start != range->end) {
walk_page_range_debug(mm, range->start, range->end,
@@ -183,6 +184,7 @@ void ptdump_walk_pgd(struct ptdump_state
range++;
}
mmap_write_unlock(mm);
+ put_online_mems();
/* Flush out the last page */
st->note_page_flush(st);
_
Patches currently in -mm which might be from anshuman.khandual(a)arm.com are
mm-ptdump-take-the-memory-hotplug-lock-inside-ptdump_walk_pgd.patch
The patch titled
Subject: mm,hugetlb: change mechanism to detect a COW on private mapping
has been added to the -mm mm-new branch. Its filename is
mmhugetlb-change-mechanism-to-detect-a-cow-on-private-mapping.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Oscar Salvador <osalvador(a)suse.de>
Subject: mm,hugetlb: change mechanism to detect a COW on private mapping
Date: Fri, 20 Jun 2025 14:30:10 +0200
Patch series "Misc rework on hugetlb faulting path", v2.
This patchset aims to give some love to the hugetlb faulting path, doing
so by removing obsolete comments that are no longer true, sorting out the
folio lock, and changing the mechanism we use to determine whether we are
COWing a private mapping already.
The most important patch of the series is #1, as it fixes a deadlock that
was described in [1], where two processes were holding the same lock for
the folio in the pagecache, and then deadlocked in the mutex.
Looking up and locking the folio in the pagecache was done to check
whether that folio was the same folio we had mapped in our pagetables,
meaning that if it was different we knew that we already mapped that folio
privately, so any further CoW would be made on a private mapping, which
lead us to the question: __Was the reservation for that address
consumed?__ That is all we care about, because if it was indeed consumed
and we are the owner and we cannot allocate more folios, we need to unmap
the folio from the processes pagetables and make it exclusive for us.
We figured we do not need to look up the folio at all, and it is just
enough to check whether the folio we have mapped is anonymous, which means
we mapped it privately, so the reservation was indeed consumed.
Patch #2 sorts out folio locking in the faulting path, reducing the scope
of it ,only taking it when we are dealing with an anonymous folio and
document it. More details in the patch.
Patches #3-5 are cleanups.
hugetlb_wp() checks whether the process is trying to COW on a private
mapping in order to know whether the reservation for that address was
already consumed or not. If it was consumed and we are the ownner of the
mapping, the folio will have to be unmapped from the other processes.
Currently, that check is done by looking up the folio in the pagecache and
comparing it to the folio which is mapped in our pagetables. If it
differs, it means we already mapped it privately before, consuming a
reservation on the way. All we are interested in is whether the mapped
folio is anonymous, so we can simplify and check for that instead.
Also, we transition from a trylock to a folio_lock, since the former was
only needed when hugetlb_fault() had to lock both folios, in order to
avoid deadlock.
Link: https://lkml.kernel.org/r/20250620123014.29748-1-osalvador@suse.de
Link: https://lkml.kernel.org/r/20250620123014.29748-2-osalvador@suse.de
Link: https://lore.kernel.org/lkml/20250513093448.592150-1-gavinguo@igalia.com/ [1]
Fixes: 40549ba8f8e0 ("hugetlb: use new vma_lock for pmd sharing synchronization")
Reported-by: Gavin Guo <gavinguo(a)igalia.com>
Closes: https://lore.kernel.org/lkml/20250513093448.592150-1-gavinguo@igalia.com/
Signed-off-by: Oscar Salvador <osalvador(a)suse.de>
Suggested-by: Peter Xu <peterx(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 70 +++++++++++--------------------------------------
1 file changed, 17 insertions(+), 53 deletions(-)
--- a/mm/hugetlb.c~mmhugetlb-change-mechanism-to-detect-a-cow-on-private-mapping
+++ a/mm/hugetlb.c
@@ -6130,8 +6130,7 @@ static void unmap_ref_private(struct mm_
* cannot race with other handlers or page migration.
* Keep the pte_same checks anyway to make transition from the mutex easier.
*/
-static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
- struct vm_fault *vmf)
+static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
@@ -6193,16 +6192,17 @@ retry_avoidcopy:
PageAnonExclusive(&old_folio->page), &old_folio->page);
/*
- * If the process that created a MAP_PRIVATE mapping is about to
- * perform a COW due to a shared page count, attempt to satisfy
- * the allocation without using the existing reserves. The pagecache
- * page is used to determine if the reserve at this address was
- * consumed or not. If reserves were used, a partial faulted mapping
- * at the time of fork() could consume its reserves on COW instead
- * of the full address range.
+ * If the process that created a MAP_PRIVATE mapping is about to perform
+ * a COW due to a shared page count, attempt to satisfy the allocation
+ * without using the existing reserves.
+ * In order to determine where this is a COW on a MAP_PRIVATE mapping it
+ * is enough to check whether the old_folio is anonymous. This means that
+ * the reserve for this address was consumed. If reserves were used, a
+ * partial faulted mapping at the fime of fork() could consume its reserves
+ * on COW instead of the full address range.
*/
if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
- old_folio != pagecache_folio)
+ folio_test_anon(old_folio))
cow_from_owner = true;
folio_get(old_folio);
@@ -6581,7 +6581,7 @@ static vm_fault_t hugetlb_no_page(struct
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
- ret = hugetlb_wp(folio, vmf);
+ ret = hugetlb_wp(vmf);
}
spin_unlock(vmf->ptl);
@@ -6648,11 +6648,9 @@ vm_fault_t hugetlb_fault(struct mm_struc
{
vm_fault_t ret;
u32 hash;
- struct folio *folio = NULL;
- struct folio *pagecache_folio = NULL;
+ struct folio *folio;
struct hstate *h = hstate_vma(vma);
struct address_space *mapping;
- int need_wait_lock = 0;
struct vm_fault vmf = {
.vma = vma,
.address = address & huge_page_mask(h),
@@ -6747,8 +6745,7 @@ vm_fault_t hugetlb_fault(struct mm_struc
* If we are going to COW/unshare the mapping later, we examine the
* pending reservations for this page now. This will ensure that any
* allocations necessary to record that reservation occur outside the
- * spinlock. Also lookup the pagecache page now as it is used to
- * determine if a reservation has been consumed.
+ * spinlock.
*/
if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
!(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(vmf.orig_pte)) {
@@ -6758,11 +6755,6 @@ vm_fault_t hugetlb_fault(struct mm_struc
}
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, vmf.address);
-
- pagecache_folio = filemap_lock_hugetlb_folio(h, mapping,
- vmf.pgoff);
- if (IS_ERR(pagecache_folio))
- pagecache_folio = NULL;
}
vmf.ptl = huge_pte_lock(h, mm, vmf.pte);
@@ -6776,10 +6768,6 @@ vm_fault_t hugetlb_fault(struct mm_struc
(flags & FAULT_FLAG_WRITE) && !huge_pte_write(vmf.orig_pte)) {
if (!userfaultfd_wp_async(vma)) {
spin_unlock(vmf.ptl);
- if (pagecache_folio) {
- folio_unlock(pagecache_folio);
- folio_put(pagecache_folio);
- }
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
return handle_userfault(&vmf, VM_UFFD_WP);
@@ -6791,23 +6779,14 @@ vm_fault_t hugetlb_fault(struct mm_struc
/* Fallthrough to CoW */
}
- /*
- * hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) and
- * pagecache_folio, so here we need take the former one
- * when folio != pagecache_folio or !pagecache_folio.
- */
+ /* hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) */
folio = page_folio(pte_page(vmf.orig_pte));
- if (folio != pagecache_folio)
- if (!folio_trylock(folio)) {
- need_wait_lock = 1;
- goto out_ptl;
- }
-
+ folio_lock(folio);
folio_get(folio);
if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
if (!huge_pte_write(vmf.orig_pte)) {
- ret = hugetlb_wp(pagecache_folio, &vmf);
+ ret = hugetlb_wp(&vmf);
goto out_put_page;
} else if (likely(flags & FAULT_FLAG_WRITE)) {
vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
@@ -6818,16 +6797,10 @@ vm_fault_t hugetlb_fault(struct mm_struc
flags & FAULT_FLAG_WRITE))
update_mmu_cache(vma, vmf.address, vmf.pte);
out_put_page:
- if (folio != pagecache_folio)
- folio_unlock(folio);
+ folio_unlock(folio);
folio_put(folio);
out_ptl:
spin_unlock(vmf.ptl);
-
- if (pagecache_folio) {
- folio_unlock(pagecache_folio);
- folio_put(pagecache_folio);
- }
out_mutex:
hugetlb_vma_unlock_read(vma);
@@ -6839,15 +6812,6 @@ out_mutex:
vma_end_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- /*
- * Generally it's safe to hold refcount during waiting page lock. But
- * here we just wait to defer the next page fault to avoid busy loop and
- * the page is not used after unlocked before returning from the current
- * page fault. So we are safe from accessing freed page, even if we wait
- * here without taking refcount.
- */
- if (need_wait_lock)
- folio_wait_locked(folio);
return ret;
}
_
Patches currently in -mm which might be from osalvador(a)suse.de are
mmslub-do-not-special-case-n_normal-nodes-for-slab_nodes.patch
mmmemory_hotplug-remove-status_change_nid_normal-and-update-documentation.patch
mmmemory_hotplug-implement-numa-node-notifier.patch
mmslub-use-node-notifier-instead-of-memory-notifier.patch
mmmemory-tiers-use-node-notifier-instead-of-memory-notifier.patch
driverscxl-use-node-notifier-instead-of-memory-notifier.patch
drivershmat-use-node-notifier-instead-of-memory-notifier.patch
kernelcpuset-use-node-notifier-instead-of-memory-notifier.patch
mmmempolicy-use-node-notifier-instead-of-memory-notifier.patch
mmpage_ext-derive-the-node-from-the-pfn.patch
mmmemory_hotplug-drop-status_change_nid-parameter-from-memory_notify.patch
mmhugetlb-change-mechanism-to-detect-a-cow-on-private-mapping.patch
mmhugetlb-sort-out-folio-locking-in-the-faulting-path.patch
mmhugetlb-rename-anon_rmap-to-new_anon_folio-and-make-it-boolean.patch
mmhugetlb-rename-anon_rmap-to-new_anon_folio-and-make-it-boolean-fix.patch
mmhugetlb-drop-obsolete-comment-about-non-present-pte-and-second-faults.patch
mmhugetlb-drop-unlikelys-from-hugetlb_fault.patch
From: Dominique Martinet <asmadeus(a)codewreck.org>
A buffer overflow vulnerability exists in the USB 9pfs transport layer
where inconsistent size validation between packet header parsing and
actual data copying allows a malicious USB host to overflow heap buffers.
The issue occurs because:
- usb9pfs_rx_header() validates only the declared size in packet header
- usb9pfs_rx_complete() uses req->actual (actual received bytes) for
memcpy
This allows an attacker to craft packets with small declared size
(bypassing validation) but large actual payload (triggering overflow
in memcpy).
Add validation in usb9pfs_rx_complete() to ensure req->actual does not
exceed the buffer capacity before copying data.
Reported-by: Yuhao Jiang <danisjiang(a)gmail.com>
Closes: https://lkml.kernel.org/r/20250616132539.63434-1-danisjiang@gmail.com
Fixes: a3be076dc174 ("net/9p/usbg: Add new usb gadget function transport")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dominique Martinet <asmadeus(a)codewreck.org>
---
(still not actually tested, can't get dummy_hcd/gt to create a device
listed by p9_fwd.py/useable in qemu, I give up..)
Changes in v3:
- fix typo s/req_sizel/req_size/ -- sorry for that, module wasn't
built...
- Link to v2: https://lore.kernel.org/r/20250620-9p-usb_overflow-v2-1-026c6109c7a1@codewr…
Changes in v2:
- run through p9_client_cb() on error
- Link to v1: https://lore.kernel.org/r/20250616132539.63434-1-danisjiang@gmail.com
---
net/9p/trans_usbg.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/net/9p/trans_usbg.c b/net/9p/trans_usbg.c
index 6b694f117aef296a66419fed5252305e7a1d0936..468f7e8f0277b9ae5f1bb3c94c649fca97d28857 100644
--- a/net/9p/trans_usbg.c
+++ b/net/9p/trans_usbg.c
@@ -231,6 +231,8 @@ static void usb9pfs_rx_complete(struct usb_ep *ep, struct usb_request *req)
struct f_usb9pfs *usb9pfs = ep->driver_data;
struct usb_composite_dev *cdev = usb9pfs->function.config->cdev;
struct p9_req_t *p9_rx_req;
+ unsigned int req_size = req->actual;
+ int status = REQ_STATUS_RCVD;
if (req->status) {
dev_err(&cdev->gadget->dev, "%s usb9pfs complete --> %d, %d/%d\n",
@@ -242,11 +244,19 @@ static void usb9pfs_rx_complete(struct usb_ep *ep, struct usb_request *req)
if (!p9_rx_req)
return;
- memcpy(p9_rx_req->rc.sdata, req->buf, req->actual);
+ if (req_size > p9_rx_req->rc.capacity) {
+ dev_err(&cdev->gadget->dev,
+ "%s received data size %u exceeds buffer capacity %zu\n",
+ ep->name, req_size, p9_rx_req->rc.capacity);
+ req_size = 0;
+ status = REQ_STATUS_ERROR;
+ }
- p9_rx_req->rc.size = req->actual;
+ memcpy(p9_rx_req->rc.sdata, req->buf, req_size);
- p9_client_cb(usb9pfs->client, p9_rx_req, REQ_STATUS_RCVD);
+ p9_rx_req->rc.size = req_size;
+
+ p9_client_cb(usb9pfs->client, p9_rx_req, status);
p9_req_put(usb9pfs->client, p9_rx_req);
complete(&usb9pfs->received);
---
base-commit: 74b4cc9b8780bfe8a3992c9ac0033bf22ac01f19
change-id: 20250620-9p-usb_overflow-25bfc5e9bef3
Best regards,
--
Dominique Martinet <asmadeus(a)codewreck.org>
From: Dominique Martinet <asmadeus(a)codewreck.org>
A buffer overflow vulnerability exists in the USB 9pfs transport layer
where inconsistent size validation between packet header parsing and
actual data copying allows a malicious USB host to overflow heap buffers.
The issue occurs because:
- usb9pfs_rx_header() validates only the declared size in packet header
- usb9pfs_rx_complete() uses req->actual (actual received bytes) for
memcpy
This allows an attacker to craft packets with small declared size
(bypassing validation) but large actual payload (triggering overflow
in memcpy).
Add validation in usb9pfs_rx_complete() to ensure req->actual does not
exceed the buffer capacity before copying data.
Reported-by: Yuhao Jiang <danisjiang(a)gmail.com>
Closes: https://lkml.kernel.org/r/20250616132539.63434-1-danisjiang@gmail.com
Fixes: a3be076dc174 ("net/9p/usbg: Add new usb gadget function transport")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dominique Martinet <asmadeus(a)codewreck.org>
---
Not actually tested, I'll try to find time to figure out how to run with
qemu for real this time...
Changes in v2:
- run through p9_client_cb() on error
- Link to v1: https://lore.kernel.org/r/20250616132539.63434-1-danisjiang@gmail.com
---
net/9p/trans_usbg.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/net/9p/trans_usbg.c b/net/9p/trans_usbg.c
index 6b694f117aef296a66419fed5252305e7a1d0936..43078e0d4ca3f4063660f659d28452c81bef10b4 100644
--- a/net/9p/trans_usbg.c
+++ b/net/9p/trans_usbg.c
@@ -231,6 +231,8 @@ static void usb9pfs_rx_complete(struct usb_ep *ep, struct usb_request *req)
struct f_usb9pfs *usb9pfs = ep->driver_data;
struct usb_composite_dev *cdev = usb9pfs->function.config->cdev;
struct p9_req_t *p9_rx_req;
+ unsigned int req_size = req->actual;
+ int status = REQ_STATUS_RCVD;
if (req->status) {
dev_err(&cdev->gadget->dev, "%s usb9pfs complete --> %d, %d/%d\n",
@@ -242,11 +244,19 @@ static void usb9pfs_rx_complete(struct usb_ep *ep, struct usb_request *req)
if (!p9_rx_req)
return;
- memcpy(p9_rx_req->rc.sdata, req->buf, req->actual);
+ if (req_size > p9_rx_req->rc.capacity) {
+ dev_err(&cdev->gadget->dev,
+ "%s received data size %u exceeds buffer capacity %zu\n",
+ ep->name, req_size, p9_rx_req->rc.capacity);
+ req_size = 0;
+ status = REQ_STATUS_ERROR;
+ }
- p9_rx_req->rc.size = req->actual;
+ memcpy(p9_rx_req->rc.sdata, req->buf, req_size);
- p9_client_cb(usb9pfs->client, p9_rx_req, REQ_STATUS_RCVD);
+ p9_rx_req->rc.size = req_sizel;
+
+ p9_client_cb(usb9pfs->client, p9_rx_req, status);
p9_req_put(usb9pfs->client, p9_rx_req);
complete(&usb9pfs->received);
---
base-commit: 74b4cc9b8780bfe8a3992c9ac0033bf22ac01f19
change-id: 20250620-9p-usb_overflow-25bfc5e9bef3
Best regards,
--
Dominique Martinet <asmadeus(a)codewreck.org>
The patch titled
Subject: kallsyms: fix build without execinfo
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kallsyms-fix-build-without-execinfo.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Achill Gilgenast <fossdd(a)pwned.life>
Subject: kallsyms: fix build without execinfo
Date: Sun, 22 Jun 2025 03:45:49 +0200
Some libc's like musl libc don't provide execinfo.h since it's not part of
POSIX. In order to fix compilation on musl, only include execinfo.h if
available (HAVE_BACKTRACE_SUPPORT)
This was discovered with c104c16073b7 ("Kunit to check the longest symbol
length") which starts to include linux/kallsyms.h with Alpine Linux'
configs.
Link: https://lkml.kernel.org/r/20250622014608.448718-1-fossdd@pwned.life
Fixes: c104c16073b7 ("Kunit to check the longest symbol length")
Signed-off-by: Achill Gilgenast <fossdd(a)pwned.life>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/include/linux/kallsyms.h | 4 ++++
1 file changed, 4 insertions(+)
--- a/tools/include/linux/kallsyms.h~kallsyms-fix-build-without-execinfo
+++ a/tools/include/linux/kallsyms.h
@@ -18,6 +18,7 @@ static inline const char *kallsyms_looku
return NULL;
}
+#ifdef HAVE_BACKTRACE_SUPPORT
#include <execinfo.h>
#include <stdlib.h>
static inline void print_ip_sym(const char *loglvl, unsigned long ip)
@@ -30,5 +31,8 @@ static inline void print_ip_sym(const ch
free(name);
}
+#else
+static inline void print_ip_sym(const char *loglvl, unsigned long ip) {}
+#endif
#endif
_
Patches currently in -mm which might be from fossdd(a)pwned.life are
kallsyms-fix-build-without-execinfo.patch
Hello,
Please consider applying the following commits for 6.12.y:
c104c16073b7 ("Kunit to check the longest symbol length")
f710202b2a45 ("x86/tools: Drop duplicate unlikely() definition in
insn_decoder_test.c")
They should apply cleanly.
Those two commits implement a kunit test to verify that a symbol with
KSYM_NAME_LEN of 512 can be read.
The first commit implements the test. This commit also includes a fix
for the test x86/insn_decoder_test. In the case a symbol exceeds the
symbol length limit, an error will happen:
arch/x86/tools/insn_decoder_test: error: malformed line 1152000:
tBb_+0xf2>
..which overflowed by 10 characters reading this line:
ffffffff81458193: 74 3d je
ffffffff814581d2
<_RNvXse_NtNtNtCshGpAVYOtgW1_4core4iter8adapters7flattenINtB5_13FlattenCompatINtNtB7_3map3MapNtNtNtBb_3str4iter5CharsNtB1v_17CharEscapeDefaultENtNtBb_4char13EscapeDefaultENtNtBb_3fmt5Debug3fmtBb_+0xf2>
The fix was proposed in [1] and initially mentioned at [2].
The second commit fixes a warning when building with clang because
there was a definition of unlikely from compiler.h in tools/include/linux,
which conflicted with the one in the instruction decoder selftest.
[1] https://lore.kernel.org/lkml/Y9ES4UKl%2F+DtvAVS@gmail.com/
[2] https://lore.kernel.org/lkml/320c4dba-9919-404b-8a26-a8af16be1845@app.fastm…
I will send something similar to 6.6.y and 6.1.y
Thanks!
Best regards,
Sergio
commit: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit")
fixes CVE-2024-57996 and commit: b3bf8f63e617 ("net_sched: sch_sfq: move
the limit validation") fixes CVE-2025-37752.
Patches 3 and 5 are CVE fixes for above mentioned CVEs. Patch 1,2 and 4
are pulled in as stable-deps.
Testeing performed on the patches 5.15.185 kernel with the above 5
patches: (Used latest upstream kselftests for tc-testing)
# ./tdc.py -f tc-tests/qdiscs/sfq.json
-- ns/SubPlugin.__init__
Test 7482: Create SFQ with default setting
Test c186: Create SFQ with limit setting
Test ae23: Create SFQ with perturb setting
Test a430: Create SFQ with quantum setting
Test 4539: Create SFQ with divisor setting
Test b089: Create SFQ with flows setting
Test 99a0: Create SFQ with depth setting
Test 7389: Create SFQ with headdrop setting
Test 6472: Create SFQ with redflowlimit setting
Test 8929: Show SFQ class
Test 4d6f: Check that limit of 1 is rejected
Test 7f8f: Check that a derived limit of 1 is rejected (limit 2 depth 1 flows 1)
Test 5168: Check that a derived limit of 1 is rejected (limit 2 depth 1 divisor 1)
All test results:
1..13
ok 1 7482 - Create SFQ with default setting
ok 2 c186 - Create SFQ with limit setting
ok 3 ae23 - Create SFQ with perturb setting
ok 4 a430 - Create SFQ with quantum setting
ok 5 4539 - Create SFQ with divisor setting
ok 6 b089 - Create SFQ with flows setting
ok 7 99a0 - Create SFQ with depth setting
ok 8 7389 - Create SFQ with headdrop setting
ok 9 6472 - Create SFQ with redflowlimit setting
ok 10 8929 - Show SFQ class
ok 11 4d6f - Check that limit of 1 is rejected
ok 12 7f8f - Check that a derived limit of 1 is rejected (limit 2 depth 1 flows 1)
ok 13 5168 - Check that a derived limit of 1 is rejected (limit 2 depth 1 divisor 1)
# uname -a
Linux hamogala-vm-6 5.15.185+ #1 SMP Fri Jun 13 18:34:53 GMT 2025 x86_64 x86_64 x86_64 GNU/Linux
I will try to send similar backports to kernels older than 5.15.y as
well.
Thanks,
Harshit
Eric Dumazet (2):
net_sched: sch_sfq: annotate data-races around q->perturb_period
net_sched: sch_sfq: handle bigger packets
Octavian Purdila (3):
net_sched: sch_sfq: don't allow 1 packet limit
net_sched: sch_sfq: use a temporary work area for validating
configuration
net_sched: sch_sfq: move the limit validation
net/sched/sch_sfq.c | 112 ++++++++++++++++++++++++++++----------------
1 file changed, 71 insertions(+), 41 deletions(-)
--
2.47.1
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x f4c7baa0699b69edb6887a992283b389761e0e81
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062239-erased-bonus-68e2@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f4c7baa0699b69edb6887a992283b389761e0e81 Mon Sep 17 00:00:00 2001
From: Haoxiang Li <haoxiang_li2024(a)163.com>
Date: Fri, 16 May 2025 15:16:54 +0300
Subject: [PATCH] drm/i915/display: Add check for alloc_ordered_workqueue() and
alloc_workqueue()
Add check for the return value of alloc_ordered_workqueue()
and alloc_workqueue(). Furthermore, if some allocations fail,
cleanup works are added to avoid potential memory leak problem.
Fixes: 40053823baad ("drm/i915/display: move modeset probe/remove functions to intel_display_driver.c")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Link: https://lore.kernel.org/r/20d3d096c6a4907636f8a1389b3b4dd753ca356e.17473976…
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
(cherry picked from commit dcab7a228f4ea9cda3f5b0a1f0679e046d23d7f7)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 5c74ab5fd1aa..411fe7b918a7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -244,31 +244,45 @@ int intel_display_driver_probe_noirq(struct intel_display *display)
intel_dmc_init(display);
display->wq.modeset = alloc_ordered_workqueue("i915_modeset", 0);
+ if (!display->wq.modeset) {
+ ret = -ENOMEM;
+ goto cleanup_vga_client_pw_domain_dmc;
+ }
+
display->wq.flip = alloc_workqueue("i915_flip", WQ_HIGHPRI |
WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE);
+ if (!display->wq.flip) {
+ ret = -ENOMEM;
+ goto cleanup_wq_modeset;
+ }
+
display->wq.cleanup = alloc_workqueue("i915_cleanup", WQ_HIGHPRI, 0);
+ if (!display->wq.cleanup) {
+ ret = -ENOMEM;
+ goto cleanup_wq_flip;
+ }
intel_mode_config_init(display);
ret = intel_cdclk_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_color_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_dbuf_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_bw_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_pmdemand_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
intel_init_quirks(display);
@@ -276,6 +290,12 @@ int intel_display_driver_probe_noirq(struct intel_display *display)
return 0;
+cleanup_wq_cleanup:
+ destroy_workqueue(display->wq.cleanup);
+cleanup_wq_flip:
+ destroy_workqueue(display->wq.flip);
+cleanup_wq_modeset:
+ destroy_workqueue(display->wq.modeset);
cleanup_vga_client_pw_domain_dmc:
intel_dmc_fini(display);
intel_power_domains_driver_remove(display);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x f4c7baa0699b69edb6887a992283b389761e0e81
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062239-playful-huff-6d2b@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f4c7baa0699b69edb6887a992283b389761e0e81 Mon Sep 17 00:00:00 2001
From: Haoxiang Li <haoxiang_li2024(a)163.com>
Date: Fri, 16 May 2025 15:16:54 +0300
Subject: [PATCH] drm/i915/display: Add check for alloc_ordered_workqueue() and
alloc_workqueue()
Add check for the return value of alloc_ordered_workqueue()
and alloc_workqueue(). Furthermore, if some allocations fail,
cleanup works are added to avoid potential memory leak problem.
Fixes: 40053823baad ("drm/i915/display: move modeset probe/remove functions to intel_display_driver.c")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Link: https://lore.kernel.org/r/20d3d096c6a4907636f8a1389b3b4dd753ca356e.17473976…
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
(cherry picked from commit dcab7a228f4ea9cda3f5b0a1f0679e046d23d7f7)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 5c74ab5fd1aa..411fe7b918a7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -244,31 +244,45 @@ int intel_display_driver_probe_noirq(struct intel_display *display)
intel_dmc_init(display);
display->wq.modeset = alloc_ordered_workqueue("i915_modeset", 0);
+ if (!display->wq.modeset) {
+ ret = -ENOMEM;
+ goto cleanup_vga_client_pw_domain_dmc;
+ }
+
display->wq.flip = alloc_workqueue("i915_flip", WQ_HIGHPRI |
WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE);
+ if (!display->wq.flip) {
+ ret = -ENOMEM;
+ goto cleanup_wq_modeset;
+ }
+
display->wq.cleanup = alloc_workqueue("i915_cleanup", WQ_HIGHPRI, 0);
+ if (!display->wq.cleanup) {
+ ret = -ENOMEM;
+ goto cleanup_wq_flip;
+ }
intel_mode_config_init(display);
ret = intel_cdclk_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_color_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_dbuf_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_bw_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_pmdemand_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
intel_init_quirks(display);
@@ -276,6 +290,12 @@ int intel_display_driver_probe_noirq(struct intel_display *display)
return 0;
+cleanup_wq_cleanup:
+ destroy_workqueue(display->wq.cleanup);
+cleanup_wq_flip:
+ destroy_workqueue(display->wq.flip);
+cleanup_wq_modeset:
+ destroy_workqueue(display->wq.modeset);
cleanup_vga_client_pw_domain_dmc:
intel_dmc_fini(display);
intel_power_domains_driver_remove(display);
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x f4c7baa0699b69edb6887a992283b389761e0e81
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062238-motivate-deflate-5b63@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f4c7baa0699b69edb6887a992283b389761e0e81 Mon Sep 17 00:00:00 2001
From: Haoxiang Li <haoxiang_li2024(a)163.com>
Date: Fri, 16 May 2025 15:16:54 +0300
Subject: [PATCH] drm/i915/display: Add check for alloc_ordered_workqueue() and
alloc_workqueue()
Add check for the return value of alloc_ordered_workqueue()
and alloc_workqueue(). Furthermore, if some allocations fail,
cleanup works are added to avoid potential memory leak problem.
Fixes: 40053823baad ("drm/i915/display: move modeset probe/remove functions to intel_display_driver.c")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Link: https://lore.kernel.org/r/20d3d096c6a4907636f8a1389b3b4dd753ca356e.17473976…
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
(cherry picked from commit dcab7a228f4ea9cda3f5b0a1f0679e046d23d7f7)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 5c74ab5fd1aa..411fe7b918a7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -244,31 +244,45 @@ int intel_display_driver_probe_noirq(struct intel_display *display)
intel_dmc_init(display);
display->wq.modeset = alloc_ordered_workqueue("i915_modeset", 0);
+ if (!display->wq.modeset) {
+ ret = -ENOMEM;
+ goto cleanup_vga_client_pw_domain_dmc;
+ }
+
display->wq.flip = alloc_workqueue("i915_flip", WQ_HIGHPRI |
WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE);
+ if (!display->wq.flip) {
+ ret = -ENOMEM;
+ goto cleanup_wq_modeset;
+ }
+
display->wq.cleanup = alloc_workqueue("i915_cleanup", WQ_HIGHPRI, 0);
+ if (!display->wq.cleanup) {
+ ret = -ENOMEM;
+ goto cleanup_wq_flip;
+ }
intel_mode_config_init(display);
ret = intel_cdclk_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_color_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_dbuf_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_bw_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
ret = intel_pmdemand_init(display);
if (ret)
- goto cleanup_vga_client_pw_domain_dmc;
+ goto cleanup_wq_cleanup;
intel_init_quirks(display);
@@ -276,6 +290,12 @@ int intel_display_driver_probe_noirq(struct intel_display *display)
return 0;
+cleanup_wq_cleanup:
+ destroy_workqueue(display->wq.cleanup);
+cleanup_wq_flip:
+ destroy_workqueue(display->wq.flip);
+cleanup_wq_modeset:
+ destroy_workqueue(display->wq.modeset);
cleanup_vga_client_pw_domain_dmc:
intel_dmc_fini(display);
intel_power_domains_driver_remove(display);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051202-nutrient-upswing-4a86@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavinguo(a)igalia.com>
Date: Mon, 21 Apr 2025 19:35:36 +0800
Subject: [PATCH] mm/huge_memory: fix dereferencing invalid pmd migration entry
When migrating a THP, concurrent access to the PMD migration entry during
a deferred split scan can lead to an invalid address access, as
illustrated below. To prevent this invalid access, it is necessary to
check the PMD migration entry and return early. In this context, there is
no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the
equality of the target folio. Since the PMD migration entry is locked, it
cannot be served as the target.
Mailing list discussion and explanation from Hugh Dickins: "An anon_vma
lookup points to a location which may contain the folio of interest, but
might instead contain another folio: and weeding out those other folios is
precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of
replacing the wrong folio" comment a few lines above it) is for."
BUG: unable to handle page fault for address: ffffea60001db008
CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60
Call Trace:
<TASK>
try_to_migrate_one+0x28c/0x3730
rmap_walk_anon+0x4f6/0x770
unmap_folio+0x196/0x1f0
split_huge_page_to_list_to_order+0x9f6/0x1560
deferred_split_scan+0xac5/0x12a0
shrinker_debugfs_scan_write+0x376/0x470
full_proxy_write+0x15c/0x220
vfs_write+0x2fc/0xcb0
ksys_write+0x146/0x250
do_syscall_64+0x6a/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
The bug is found by syzkaller on an internal kernel, then confirmed on
upstream.
Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com
Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/
Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/
Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path")
Signed-off-by: Gavin Guo <gavinguo(a)igalia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Gavin Shan <gshan(a)redhat.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a47682d1ab7..47d76d03ce30 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, bool freeze, struct folio *folio)
{
+ bool pmd_migration = is_pmd_migration_entry(*pmd);
+
VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
@@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
* require a folio to check the PMD against. Otherwise, there
* is a risk of replacing the wrong folio.
*/
- if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
- is_pmd_migration_entry(*pmd)) {
- if (folio && folio != pmd_folio(*pmd))
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) {
+ /*
+ * Do not apply pmd_folio() to a migration entry; and folio lock
+ * guarantees that it must be of the wrong folio anyway.
+ */
+ if (folio && (pmd_migration || folio != pmd_folio(*pmd)))
return;
__split_huge_pmd_locked(vma, pmd, address, freeze);
}
The patch titled
Subject: ocfs2: kill osb->system_file_mutex lock
has been added to the -mm mm-nonmm-unstable branch. Its filename is
ocfs2-kill-osb-system_file_mutex-lock.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Subject: ocfs2: kill osb->system_file_mutex lock
Date: Sun, 22 Jun 2025 00:56:46 +0900
Since calling _ocfs2_get_system_file_inode() twice with the same arguments
returns the same address, there is no need to serialize
_ocfs2_get_system_file_inode() using osb->system_file_mutex lock.
Kill osb->system_file_mutex lock in order to avoid AB-BA deadlock.
cmpxchg() will be sufficient for avoiding the inode refcount leak problem
which commit 43b10a20372d ("ocfs2: avoid system inode ref confusion by
adding mutex lock") tried to address.
Link: https://lkml.kernel.org/r/934355dd-a0b1-4e53-93ac-0a7ae7458051@I-love.SAKUR…
Reported-by: Diogo Jahchan Koike <djahchankoike(a)gmail.com>
Closes: https://lkml.kernel.org/r/000000000000ff2d7a0620381afe@google.com
Fixes: 43b10a20372d ("ocfs2: avoid system inode ref confusion by adding mutex lock")
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: jiangyiwen <jiangyiwen(a)huawei.com>
Cc: Joseph Qi <joseph.qi(a)huawei.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Mark Fasheh <mfasheh(a)suse.com>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/ocfs2.h | 2 --
fs/ocfs2/super.c | 2 --
fs/ocfs2/sysfile.c | 9 +++------
3 files changed, 3 insertions(+), 10 deletions(-)
--- a/fs/ocfs2/ocfs2.h~ocfs2-kill-osb-system_file_mutex-lock
+++ a/fs/ocfs2/ocfs2.h
@@ -494,8 +494,6 @@ struct ocfs2_super
struct rb_root osb_rf_lock_tree;
struct ocfs2_refcount_tree *osb_ref_tree_lru;
- struct mutex system_file_mutex;
-
/*
* OCFS2 needs to schedule several different types of work which
* require cluster locking, disk I/O, recovery waits, etc. Since these
--- a/fs/ocfs2/super.c~ocfs2-kill-osb-system_file_mutex-lock
+++ a/fs/ocfs2/super.c
@@ -1997,8 +1997,6 @@ static int ocfs2_initialize_super(struct
spin_lock_init(&osb->osb_xattr_lock);
ocfs2_init_steal_slots(osb);
- mutex_init(&osb->system_file_mutex);
-
atomic_set(&osb->alloc_stats.moves, 0);
atomic_set(&osb->alloc_stats.local_data, 0);
atomic_set(&osb->alloc_stats.bitmap_data, 0);
--- a/fs/ocfs2/sysfile.c~ocfs2-kill-osb-system_file_mutex-lock
+++ a/fs/ocfs2/sysfile.c
@@ -98,11 +98,9 @@ struct inode *ocfs2_get_system_file_inod
} else
arr = get_local_system_inode(osb, type, slot);
- mutex_lock(&osb->system_file_mutex);
if (arr && ((inode = *arr) != NULL)) {
/* get a ref in addition to the array ref */
inode = igrab(inode);
- mutex_unlock(&osb->system_file_mutex);
BUG_ON(!inode);
return inode;
@@ -112,11 +110,10 @@ struct inode *ocfs2_get_system_file_inod
inode = _ocfs2_get_system_file_inode(osb, type, slot);
/* add one more if putting into array for first time */
- if (arr && inode) {
- *arr = igrab(inode);
- BUG_ON(!*arr);
+ if (inode && arr && !*arr && !cmpxchg(&(*arr), NULL, inode)) {
+ inode = igrab(inode);
+ BUG_ON(!inode);
}
- mutex_unlock(&osb->system_file_mutex);
return inode;
}
_
Patches currently in -mm which might be from penguin-kernel(a)I-love.SAKURA.ne.jp are
ocfs2-kill-osb-system_file_mutex-lock.patch
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051204-tidal-lake-6ae7@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavinguo(a)igalia.com>
Date: Mon, 21 Apr 2025 19:35:36 +0800
Subject: [PATCH] mm/huge_memory: fix dereferencing invalid pmd migration entry
When migrating a THP, concurrent access to the PMD migration entry during
a deferred split scan can lead to an invalid address access, as
illustrated below. To prevent this invalid access, it is necessary to
check the PMD migration entry and return early. In this context, there is
no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the
equality of the target folio. Since the PMD migration entry is locked, it
cannot be served as the target.
Mailing list discussion and explanation from Hugh Dickins: "An anon_vma
lookup points to a location which may contain the folio of interest, but
might instead contain another folio: and weeding out those other folios is
precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of
replacing the wrong folio" comment a few lines above it) is for."
BUG: unable to handle page fault for address: ffffea60001db008
CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60
Call Trace:
<TASK>
try_to_migrate_one+0x28c/0x3730
rmap_walk_anon+0x4f6/0x770
unmap_folio+0x196/0x1f0
split_huge_page_to_list_to_order+0x9f6/0x1560
deferred_split_scan+0xac5/0x12a0
shrinker_debugfs_scan_write+0x376/0x470
full_proxy_write+0x15c/0x220
vfs_write+0x2fc/0xcb0
ksys_write+0x146/0x250
do_syscall_64+0x6a/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
The bug is found by syzkaller on an internal kernel, then confirmed on
upstream.
Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com
Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/
Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/
Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path")
Signed-off-by: Gavin Guo <gavinguo(a)igalia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Gavin Shan <gshan(a)redhat.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a47682d1ab7..47d76d03ce30 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, bool freeze, struct folio *folio)
{
+ bool pmd_migration = is_pmd_migration_entry(*pmd);
+
VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
@@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
* require a folio to check the PMD against. Otherwise, there
* is a risk of replacing the wrong folio.
*/
- if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
- is_pmd_migration_entry(*pmd)) {
- if (folio && folio != pmd_folio(*pmd))
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) {
+ /*
+ * Do not apply pmd_folio() to a migration entry; and folio lock
+ * guarantees that it must be of the wrong folio anyway.
+ */
+ if (folio && (pmd_migration || folio != pmd_folio(*pmd)))
return;
__split_huge_pmd_locked(vma, pmd, address, freeze);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051206-t-shirt-wrist-ad33@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavinguo(a)igalia.com>
Date: Mon, 21 Apr 2025 19:35:36 +0800
Subject: [PATCH] mm/huge_memory: fix dereferencing invalid pmd migration entry
When migrating a THP, concurrent access to the PMD migration entry during
a deferred split scan can lead to an invalid address access, as
illustrated below. To prevent this invalid access, it is necessary to
check the PMD migration entry and return early. In this context, there is
no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the
equality of the target folio. Since the PMD migration entry is locked, it
cannot be served as the target.
Mailing list discussion and explanation from Hugh Dickins: "An anon_vma
lookup points to a location which may contain the folio of interest, but
might instead contain another folio: and weeding out those other folios is
precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of
replacing the wrong folio" comment a few lines above it) is for."
BUG: unable to handle page fault for address: ffffea60001db008
CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60
Call Trace:
<TASK>
try_to_migrate_one+0x28c/0x3730
rmap_walk_anon+0x4f6/0x770
unmap_folio+0x196/0x1f0
split_huge_page_to_list_to_order+0x9f6/0x1560
deferred_split_scan+0xac5/0x12a0
shrinker_debugfs_scan_write+0x376/0x470
full_proxy_write+0x15c/0x220
vfs_write+0x2fc/0xcb0
ksys_write+0x146/0x250
do_syscall_64+0x6a/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
The bug is found by syzkaller on an internal kernel, then confirmed on
upstream.
Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com
Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/
Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/
Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path")
Signed-off-by: Gavin Guo <gavinguo(a)igalia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Gavin Shan <gshan(a)redhat.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a47682d1ab7..47d76d03ce30 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, bool freeze, struct folio *folio)
{
+ bool pmd_migration = is_pmd_migration_entry(*pmd);
+
VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
@@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
* require a folio to check the PMD against. Otherwise, there
* is a risk of replacing the wrong folio.
*/
- if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
- is_pmd_migration_entry(*pmd)) {
- if (folio && folio != pmd_folio(*pmd))
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) {
+ /*
+ * Do not apply pmd_folio() to a migration entry; and folio lock
+ * guarantees that it must be of the wrong folio anyway.
+ */
+ if (folio && (pmd_migration || folio != pmd_folio(*pmd)))
return;
__split_huge_pmd_locked(vma, pmd, address, freeze);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051205-work-bronze-e167@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavinguo(a)igalia.com>
Date: Mon, 21 Apr 2025 19:35:36 +0800
Subject: [PATCH] mm/huge_memory: fix dereferencing invalid pmd migration entry
When migrating a THP, concurrent access to the PMD migration entry during
a deferred split scan can lead to an invalid address access, as
illustrated below. To prevent this invalid access, it is necessary to
check the PMD migration entry and return early. In this context, there is
no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the
equality of the target folio. Since the PMD migration entry is locked, it
cannot be served as the target.
Mailing list discussion and explanation from Hugh Dickins: "An anon_vma
lookup points to a location which may contain the folio of interest, but
might instead contain another folio: and weeding out those other folios is
precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of
replacing the wrong folio" comment a few lines above it) is for."
BUG: unable to handle page fault for address: ffffea60001db008
CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60
Call Trace:
<TASK>
try_to_migrate_one+0x28c/0x3730
rmap_walk_anon+0x4f6/0x770
unmap_folio+0x196/0x1f0
split_huge_page_to_list_to_order+0x9f6/0x1560
deferred_split_scan+0xac5/0x12a0
shrinker_debugfs_scan_write+0x376/0x470
full_proxy_write+0x15c/0x220
vfs_write+0x2fc/0xcb0
ksys_write+0x146/0x250
do_syscall_64+0x6a/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
The bug is found by syzkaller on an internal kernel, then confirmed on
upstream.
Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com
Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/
Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/
Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path")
Signed-off-by: Gavin Guo <gavinguo(a)igalia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Gavin Shan <gshan(a)redhat.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a47682d1ab7..47d76d03ce30 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, bool freeze, struct folio *folio)
{
+ bool pmd_migration = is_pmd_migration_entry(*pmd);
+
VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
@@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
* require a folio to check the PMD against. Otherwise, there
* is a risk of replacing the wrong folio.
*/
- if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
- is_pmd_migration_entry(*pmd)) {
- if (folio && folio != pmd_folio(*pmd))
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) {
+ /*
+ * Do not apply pmd_folio() to a migration entry; and folio lock
+ * guarantees that it must be of the wrong folio anyway.
+ */
+ if (folio && (pmd_migration || folio != pmd_folio(*pmd)))
return;
__split_huge_pmd_locked(vma, pmd, address, freeze);
}
From: Colin Foster <colin.foster(a)in-advantage.com>
commit b9bf5612610aa7e38d58fee16f489814db251c01 upstream.
Prior to commit df16c1c51d81 ("net: phy: mdio_device: Reset device only
when necessary") MDIO reset deasserts were performed twice during boot.
Now that the second deassert is no longer performed, device probe
failures happen due to the change in timing with the following error
message:
SMSC LAN8710/LAN8720: probe of 4a101000.mdio:00 failed with error -5
Restore the original effective timing, which resolves the probe
failures.
Signed-off-by: Colin Foster <colin.foster(a)in-advantage.com>
Link: https://lore.kernel.org/r/20240531183817.2698445-1-colin.foster@in-advantag…
Signed-off-by: Kevin Hilman <khilman(a)baylibre.com>
Signed-off-by: Nobuhiro Iwamatsu (CIP) <nobuhiro1.iwamatsu(a)toshiba.co.jp>
---
arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
index 96451c8a815c..4867ff28c97e 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
@@ -385,7 +385,7 @@
/* Support GPIO reset on revision C3 boards */
reset-gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
reset-assert-us = <300>;
- reset-deassert-us = <6500>;
+ reset-deassert-us = <13000>;
};
};
--
2.25.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x af98b0157adf6504fade79b3e6cb260c4ff68e37
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062052-vigorous-overlaid-8bec@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From af98b0157adf6504fade79b3e6cb260c4ff68e37 Mon Sep 17 00:00:00 2001
From: Jeongjun Park <aha310510(a)gmail.com>
Date: Wed, 14 May 2025 22:08:55 +0900
Subject: [PATCH] jbd2: fix data-race and null-ptr-deref in
jbd2_journal_dirty_metadata()
Since handle->h_transaction may be a NULL pointer, so we should change it
to call is_handle_aborted(handle) first before dereferencing it.
And the following data-race was reported in my fuzzer:
==================================================================
BUG: KCSAN: data-race in jbd2_journal_dirty_metadata / jbd2_journal_dirty_metadata
write to 0xffff888011024104 of 4 bytes by task 10881 on cpu 1:
jbd2_journal_dirty_metadata+0x2a5/0x770 fs/jbd2/transaction.c:1556
__ext4_handle_dirty_metadata+0xe7/0x4b0 fs/ext4/ext4_jbd2.c:358
ext4_do_update_inode fs/ext4/inode.c:5220 [inline]
ext4_mark_iloc_dirty+0x32c/0xd50 fs/ext4/inode.c:5869
__ext4_mark_inode_dirty+0xe1/0x450 fs/ext4/inode.c:6074
ext4_dirty_inode+0x98/0xc0 fs/ext4/inode.c:6103
....
read to 0xffff888011024104 of 4 bytes by task 10880 on cpu 0:
jbd2_journal_dirty_metadata+0xf2/0x770 fs/jbd2/transaction.c:1512
__ext4_handle_dirty_metadata+0xe7/0x4b0 fs/ext4/ext4_jbd2.c:358
ext4_do_update_inode fs/ext4/inode.c:5220 [inline]
ext4_mark_iloc_dirty+0x32c/0xd50 fs/ext4/inode.c:5869
__ext4_mark_inode_dirty+0xe1/0x450 fs/ext4/inode.c:6074
ext4_dirty_inode+0x98/0xc0 fs/ext4/inode.c:6103
....
value changed: 0x00000000 -> 0x00000001
==================================================================
This issue is caused by missing data-race annotation for jh->b_modified.
Therefore, the missing annotation needs to be added.
Reported-by: syzbot+de24c3fe3c4091051710(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=de24c3fe3c4091051710
Fixes: 6e06ae88edae ("jbd2: speedup jbd2_journal_dirty_metadata()")
Signed-off-by: Jeongjun Park <aha310510(a)gmail.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://patch.msgid.link/20250514130855.99010-1-aha310510@gmail.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index cbc4785462f5..c7867139af69 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1509,7 +1509,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
jh->b_next_transaction == transaction);
spin_unlock(&jh->b_state_lock);
}
- if (jh->b_modified == 1) {
+ if (data_race(jh->b_modified == 1)) {
/* If it's in our transaction it must be in BJ_Metadata list. */
if (data_race(jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)) {
@@ -1528,7 +1528,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
goto out;
}
- journal = transaction->t_journal;
spin_lock(&jh->b_state_lock);
if (is_handle_aborted(handle)) {
@@ -1543,6 +1542,8 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
goto out_unlock_bh;
}
+ journal = transaction->t_journal;
+
if (jh->b_modified == 0) {
/*
* This buffer's got modified and becoming part
This reverts commit 7adb96687ce8819de5c7bb172c4eeb6e45736e06.
commit 7adb96687ce8 ("x86/bugs: Make spectre user default depend on
MITIGATION_SPECTRE_V2") depends on commit 72c70f480a70 ("x86/bugs: Add
a separate config for Spectre V2"), which introduced
MITIGATION_SPECTRE_V2.
commit 72c70f480a70 ("x86/bugs: Add a separate config for Spectre V2")
never landed in stable tree, thus, stable tree doesn't have
MITIGATION_SPECTRE_V2, that said, commit 7adb96687ce8 ("x86/bugs: Make
spectre user default depend on MITIGATION_SPECTRE_V2") has no value if
the dependecy was not applied.
Revert commit 7adb96687ce8 ("x86/bugs: Make spectre user default
depend on MITIGATION_SPECTRE_V2") in stable kernel which landed in in
5.4.294, 5.10.238, 5.15.185, 6.1.141 and 6.6.93 stable versions.
Cc: David.Kaplan(a)amd.com
Cc: peterz(a)infradead.org
Cc: pawan.kumar.gupta(a)linux.intel.com
Cc: mingo(a)kernel.org
Cc: brad.spengler(a)opensrcsec.com
Cc: stable(a)vger.kernel.org # 6.6 6.1 5.15 5.10 5.4
Reported-by: Brad Spengler <brad.spengler(a)opensrcsec.com>
Reported-by: Salvatore Bonaccorso <carnil(a)debian.org>
Signed-off-by: Breno Leitao <leitao(a)debian.org>
---
PS: This patch is only for stable (6.6 and older).
---
Documentation/admin-guide/kernel-parameters.txt | 2 --
arch/x86/kernel/cpu/bugs.c | 10 +++-------
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 315a817e33804..f95734ceb82b8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5978,8 +5978,6 @@
Selecting 'on' will also enable the mitigation
against user space to user space task attacks.
- Selecting specific mitigation does not force enable
- user mitigations.
Selecting 'off' will disable both the kernel and
the user space protections.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e9c4bcb38f458..07b45bbf6348d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1442,13 +1442,9 @@ static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
static enum spectre_v2_user_cmd __init
spectre_v2_parse_user_cmdline(void)
{
- enum spectre_v2_user_cmd mode;
char arg[20];
int ret, i;
- mode = IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ?
- SPECTRE_V2_USER_CMD_AUTO : SPECTRE_V2_USER_CMD_NONE;
-
switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
@@ -1461,7 +1457,7 @@ spectre_v2_parse_user_cmdline(void)
ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
arg, sizeof(arg));
if (ret < 0)
- return mode;
+ return SPECTRE_V2_USER_CMD_AUTO;
for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
if (match_option(arg, ret, v2_user_options[i].option)) {
@@ -1471,8 +1467,8 @@ spectre_v2_parse_user_cmdline(void)
}
}
- pr_err("Unknown user space protection option (%s). Switching to default\n", arg);
- return mode;
+ pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
+ return SPECTRE_V2_USER_CMD_AUTO;
}
static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
---
base-commit: 6282921b6825fef6a1243e1c80063421d41e2576
change-id: 20250620-stable_revert_66-092ba4e66bec
prerequisite-change-id: 20250620-stable_revert-1809dd16f554:v1
Best regards,
--
Breno Leitao <leitao(a)debian.org>
It is better to print out the non supported num_dmics than printing that
it is not matching with 2 or 4.
Fixes: 2fbeff33381c ("ASoC: Intel: add sof_sdw_get_tplg_files ops")
Cc: stable(a)vger.kernel.org
Signed-off-by: Peter Ujfalusi <peter.ujfalusi(a)linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao(a)linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.dev>
---
sound/soc/intel/common/sof-function-topology-lib.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sound/soc/intel/common/sof-function-topology-lib.c b/sound/soc/intel/common/sof-function-topology-lib.c
index 90fe7aa3df1c..3cc81dcf047e 100644
--- a/sound/soc/intel/common/sof-function-topology-lib.c
+++ b/sound/soc/intel/common/sof-function-topology-lib.c
@@ -73,7 +73,8 @@ int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_
break;
default:
dev_warn(card->dev,
- "only -2ch and -4ch are supported for dmic\n");
+ "unsupported number of dmics: %d\n",
+ mach_params.dmic_num);
continue;
}
tplg_dev = TPLG_DEVICE_INTEL_PCH_DMIC;
--
2.49.0
From: "David (Ming Qiang) Wu" <David.Wu3(a)amd.com>
On VCN v4.0.5 there is a race condition where the WPTR is not
updated after starting from idle when doorbell is used. Adding
register read-back after written at function end is to ensure
all register writes are done before they can be used.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12528
Signed-off-by: David (Ming Qiang) Wu <David.Wu3(a)amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com>
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com>
Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com>
Reviewed-by: Ruijing Dong <ruijing.dong(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
(cherry picked from commit ee7360fc27d6045510f8fe459b5649b2af27811a)
Hand modified for contextual changes where there is a for loop
in 6.12 that was dropped later on.
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index e0b02bf1c563..3d114ea7049f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -985,6 +985,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL);
+
return 0;
}
@@ -1167,6 +1171,10 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev)
tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
}
return 0;
--
2.43.0
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 9c49e5d09f076001e05537734d7df002162eb2b5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062020-gambling-poker-8b0c@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9c49e5d09f076001e05537734d7df002162eb2b5 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj(a)kernel.org>
Date: Mon, 2 Jun 2025 10:49:26 -0700
Subject: [PATCH] mm/madvise: handle madvise_lock() failure during race
unwinding
When unwinding race on -ERESTARTNOINTR handling of process_madvise(),
madvise_lock() failure is ignored. Check the failure and abort remaining
works in the case.
Link: https://lkml.kernel.org/r/20250602174926.1074-1-sj@kernel.org
Fixes: 4000e3d0a367 ("mm/madvise: remove redundant mmap_lock operations from process_madvise()")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reported-by: Barry Song <21cnbao(a)gmail.com>
Closes: https://lore.kernel.org/CAGsJ_4xJXXO0G+4BizhohSZ4yDteziPw43_uF8nPXPWxUVChzw…
Reviewed-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Shakeel Butt <shakeel.butt(a)linux.dev>
Reviewed-by: Barry Song <baohua(a)kernel.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/madvise.c b/mm/madvise.c
index 8433ac9b27e0..5f7a66a1617e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1881,7 +1881,9 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
/* Drop and reacquire lock to unwind race. */
madvise_finish_tlb(&madv_behavior);
madvise_unlock(mm, behavior);
- madvise_lock(mm, behavior);
+ ret = madvise_lock(mm, behavior);
+ if (ret)
+ goto out;
madvise_init_tlb(&madv_behavior, mm);
continue;
}
@@ -1892,6 +1894,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
madvise_finish_tlb(&madv_behavior);
madvise_unlock(mm, behavior);
+out:
ret = (total_len - iov_iter_count(iter)) ? : ret;
return ret;
From: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
[ Upstream commit b43b1e2c52db77c872bd60d30cdcc72c47df70c7 ]
If a shared IRQ is used by the driver due to platform limitation, then the
IRQ affinity hint is set right after the allocation of IRQ vectors in
ath12k_pci_msi_alloc(). This does no harm unless one of the functions
requesting the IRQ fails and attempt to free the IRQ.
This may end up with a warning from the IRQ core that is expecting the
affinity hint to be cleared before freeing the IRQ:
kernel/irq/manage.c:
/* make sure affinity_hint is cleaned up */
if (WARN_ON_ONCE(desc->affinity_hint))
desc->affinity_hint = NULL;
So to fix this issue, clear the IRQ affinity hint before calling
ath12k_pci_free_irq() in the error path. The affinity will be cleared once
again further down the error path due to code organization, but that does
no harm.
Fixes: a3012f206d07 ("wifi: ath12k: set IRQ affinity to CPU0 in case of one MSI vector")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Reviewed-by: Baochen Qiang <quic_bqiang(a)quicinc.com>
Link: https://patch.msgid.link/20250225053447.16824-3-manivannan.sadhasivam@linar…
Signed-off-by: Jeff Johnson <jeff.johnson(a)oss.qualcomm.com>
Signed-off-by: Wenshan Lan <jetlan9(a)163.com>
---
drivers/net/wireless/ath/ath12k/pci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c
index 45d537066345..9c35fbe52872 100644
--- a/drivers/net/wireless/ath/ath12k/pci.c
+++ b/drivers/net/wireless/ath/ath12k/pci.c
@@ -1503,6 +1503,8 @@ static int ath12k_pci_probe(struct pci_dev *pdev,
return 0;
err_free_irq:
+ /* __free_irq() expects the caller to have cleared the affinity hint */
+ ath12k_pci_set_irq_affinity_hint(ab_pci, NULL);
ath12k_pci_free_irq(ab);
err_ce_free:
--
2.34.1
On 6/20/25 4:32 AM, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> openvswitch: Stricter validation for the userspace action
>
> to the 6.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> openvswitch-stricter-validation-for-the-userspace-ac.patch
> and it can be found in the queue-6.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
FWIW, backporting of this change was previously discussed here:
https://lore.kernel.org/netdev/2025060520-slacking-swimmer-1b31@gregkh/
With the conclusion to drop it as it's not a bug fix and hence there is
no reason to backport it.
Best regards, Ilya Maximets.
>
>
>
> commit 77c2ef6608f0cb47cbcc0d3e0a4371e35f70e125
> Author: Eelco Chaudron <echaudro(a)redhat.com>
> Date: Mon May 12 10:08:24 2025 +0200
>
> openvswitch: Stricter validation for the userspace action
>
> [ Upstream commit 88906f55954131ed2d3974e044b7fb48129b86ae ]
>
> This change enhances the robustness of validate_userspace() by ensuring
> that all Netlink attributes are fully contained within the parent
> attribute. The previous use of nla_parse_nested_deprecated() could
> silently skip trailing or malformed attributes, as it stops parsing at
> the first invalid entry.
>
> By switching to nla_parse_deprecated_strict(), we make sure only fully
> validated attributes are copied for later use.
>
> Signed-off-by: Eelco Chaudron <echaudro(a)redhat.com>
> Reviewed-by: Simon Horman <horms(a)kernel.org>
> Acked-by: Ilya Maximets <i.maximets(a)ovn.org>
> Link: https://patch.msgid.link/67eb414e2d250e8408bb8afeb982deca2ff2b10b.174703730…
> Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 518be23e48ea9..ad64bb9ab5e25 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -3049,7 +3049,8 @@ static int validate_userspace(const struct nlattr *attr)
> struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
> int error;
>
> - error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr,
> + error = nla_parse_deprecated_strict(a, OVS_USERSPACE_ATTR_MAX,
> + nla_data(attr), nla_len(attr),
> userspace_policy, NULL);
> if (error)
> return error;
The patch titled
Subject: lib/alloc_tag: do not acquire non-existent lock in alloc_tag_top_users()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Harry Yoo <harry.yoo(a)oracle.com>
Subject: lib/alloc_tag: do not acquire non-existent lock in alloc_tag_top_users()
Date: Sat, 21 Jun 2025 04:53:05 +0900
alloc_tag_top_users() attempts to lock alloc_tag_cttype->mod_lock even
when the alloc_tag_cttype is not allocated because:
1) alloc tagging is disabled because mem profiling is disabled
(!alloc_tag_cttype)
2) alloc tagging is enabled, but not yet initialized (!alloc_tag_cttype)
3) alloc tagging is enabled, but failed initialization
(!alloc_tag_cttype or IS_ERR(alloc_tag_cttype))
In all cases, alloc_tag_cttype is not allocated, and therefore
alloc_tag_top_users() should not attempt to acquire the semaphore.
This leads to a crash on memory allocation failure by attempting to
acquire a non-existent semaphore:
Oops: general protection fault, probably for non-canonical address 0xdffffc000000001b: 0000 [#3] SMP KASAN NOPTI
KASAN: null-ptr-deref in range [0x00000000000000d8-0x00000000000000df]
CPU: 2 UID: 0 PID: 1 Comm: systemd Tainted: G D 6.16.0-rc2 #1 VOLUNTARY
Tainted: [D]=DIE
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:down_read_trylock+0xaa/0x3b0
Code: d0 7c 08 84 d2 0f 85 a0 02 00 00 8b 0d df 31 dd 04 85 c9 75 29 48 b8 00 00 00 00 00 fc ff df 48 8d 6b 68 48 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 88 02 00 00 48 3b 5b 68 0f 85 53 01 00 00 65 ff
RSP: 0000:ffff8881002ce9b8 EFLAGS: 00010016
RAX: dffffc0000000000 RBX: 0000000000000070 RCX: 0000000000000000
RDX: 000000000000001b RSI: 000000000000000a RDI: 0000000000000070
RBP: 00000000000000d8 R08: 0000000000000001 R09: ffffed107dde49d1
R10: ffff8883eef24e8b R11: ffff8881002cec20 R12: 1ffff11020059d37
R13: 00000000003fff7b R14: ffff8881002cec20 R15: dffffc0000000000
FS: 00007f963f21d940(0000) GS:ffff888458ca6000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f963f5edf71 CR3: 000000010672c000 CR4: 0000000000350ef0
Call Trace:
<TASK>
codetag_trylock_module_list+0xd/0x20
alloc_tag_top_users+0x369/0x4b0
__show_mem+0x1cd/0x6e0
warn_alloc+0x2b1/0x390
__alloc_frozen_pages_noprof+0x12b9/0x21a0
alloc_pages_mpol+0x135/0x3e0
alloc_slab_page+0x82/0xe0
new_slab+0x212/0x240
___slab_alloc+0x82a/0xe00
</TASK>
As David Wang points out, this issue became easier to trigger after commit
780138b12381 ("alloc_tag: check mem_profiling_support in alloc_tag_init").
Before the commit, the issue occurred only when it failed to allocate and
initialize alloc_tag_cttype or if a memory allocation fails before
alloc_tag_init() is called. After the commit, it can be easily triggered
when memory profiling is compiled but disabled at boot.
To properly determine whether alloc_tag_init() has been called and its
data structures initialized, verify that alloc_tag_cttype is a valid
pointer before acquiring the semaphore. If the variable is NULL or an
error value, it has not been properly initialized. In such a case, just
skip and do not attempt acquire the semaphore.
Link: https://lkml.kernel.org/r/20250620195305.1115151-1-harry.yoo@oracle.com
Fixes: 780138b12381 ("alloc_tag: check mem_profiling_support in alloc_tag_init")
Fixes: 1438d349d16b ("lib: add memory allocations report in show_mem()")
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202506181351.bba867dd-lkp@intel.com
Cc: Casey Chen <cachen(a)purestorage.com>
Cc: David Wang <00107082(a)163.com>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/alloc_tag.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/lib/alloc_tag.c~lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users
+++ a/lib/alloc_tag.c
@@ -135,7 +135,9 @@ size_t alloc_tag_top_users(struct codeta
struct codetag_bytes n;
unsigned int i, nr = 0;
- if (can_sleep)
+ if (IS_ERR_OR_NULL(alloc_tag_cttype))
+ return 0;
+ else if (can_sleep)
codetag_lock_module_list(alloc_tag_cttype, true);
else if (!codetag_trylock_module_list(alloc_tag_cttype))
return 0;
_
Patches currently in -mm which might be from harry.yoo(a)oracle.com are
lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch
lib-alloc_tag-do-not-acquire-nonexistent-lock-when-mem-profiling-is-disabled.patch
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062043-plunging-sculpture-7ca1@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 27 May 2025 23:23:53 +0200
Subject: [PATCH] mm/hugetlb: unshare page tables during VMA split, not before
Currently, __split_vma() triggers hugetlb page table unsharing through
vm_ops->may_split(). This happens before the VMA lock and rmap locks are
taken - which is too early, it allows racing VMA-locked page faults in our
process and racing rmap walks from other processes to cause page tables to
be shared again before we actually perform the split.
Fix it by explicitly calling into the hugetlb unshare logic from
__split_vma() in the same place where THP splitting also happens. At that
point, both the VMA and the rmap(s) are write-locked.
An annoying detail is that we can now call into the helper
hugetlb_unshare_pmds() from two different locking contexts:
1. from hugetlb_split(), holding:
- mmap lock (exclusively)
- VMA lock
- file rmap lock (exclusively)
2. hugetlb_unshare_all_pmds(), which I think is designed to be able to
call us with only the mmap lock held (in shared mode), but currently
only runs while holding mmap lock (exclusively) and VMA lock
Backporting note:
This commit fixes a racy protection that was introduced in commit
b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that
commit claimed to fix an issue introduced in 5.13, but it should actually
also go all the way back.
[jannh(a)google.com: v2]
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1…
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1…
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58…
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs]
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0598f36931de..42f374e828a2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
}
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f0b1d53079f9..7ba020d489d4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -121,7 +121,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, bool take_locks);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static void hugetlb_free_folio(struct folio *folio)
@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
+ return 0;
+}
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
+{
/*
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+ * This function is called in the middle of a VMA split operation, with
+ * MM, VMA and rmap all write-locked to prevent concurrent page table
+ * walks (except hardware and gup_fast()).
*/
+ vma_assert_write_locked(vma);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
if (addr & ~PUD_MASK) {
- /*
- * hugetlb_vm_op_split is called right before we attempt to
- * split the VMA. We will need to unshare PMDs in the old and
- * new VMAs, so let's unshare before we split.
- */
unsigned long floor = addr & PUD_MASK;
unsigned long ceil = floor + PUD_SIZE;
- if (floor >= vma->vm_start && ceil <= vma->vm_end)
- hugetlb_unshare_pmds(vma, floor, ceil);
+ if (floor >= vma->vm_start && ceil <= vma->vm_end) {
+ /*
+ * Locking:
+ * Use take_locks=false here.
+ * The file rmap lock is already held.
+ * The hugetlb VMA lock can't be taken when we already
+ * hold the file rmap lock, and we don't need it because
+ * its purpose is to synchronize against concurrent page
+ * table walks, which are not possible thanks to the
+ * locks held by our caller.
+ */
+ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
+ }
}
-
- return 0;
}
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
@@ -7885,9 +7899,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
spin_unlock_irq(&hugetlb_lock);
}
+/*
+ * If @take_locks is false, the caller must ensure that no concurrent page table
+ * access can happen (except for gup_fast() and hardware page walks).
+ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
+ * concurrent page fault handling) and the file rmap lock.
+ */
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool take_locks)
{
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
@@ -7911,8 +7932,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
start, end);
mmu_notifier_invalidate_range_start(&range);
- hugetlb_vma_lock_write(vma);
- i_mmap_lock_write(vma->vm_file->f_mapping);
+ if (take_locks) {
+ hugetlb_vma_lock_write(vma);
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+ }
for (address = start; address < end; address += PUD_SIZE) {
ptep = hugetlb_walk(vma, address, sz);
if (!ptep)
@@ -7922,8 +7947,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
spin_unlock(ptl);
}
flush_hugetlb_tlb_range(vma, start, end);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
- hugetlb_vma_unlock_write(vma);
+ if (take_locks) {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
/*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst.
@@ -7938,7 +7965,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
- ALIGN_DOWN(vma->vm_end, PUD_SIZE));
+ ALIGN_DOWN(vma->vm_end, PUD_SIZE),
+ /* take_locks = */ true);
}
/*
diff --git a/mm/vma.c b/mm/vma.c
index 1c6595f282e5..7ebc9eb608f4 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -539,7 +539,14 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
init_vma_prep(&vp, vma);
vp.insert = new;
vma_prepare(&vp);
+
+ /*
+ * Get rid of huge pages and shared page tables straddling the split
+ * boundary.
+ */
vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_split(vma, addr);
if (new_below) {
vma->vm_start = addr;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 441feb21aa5a..4505b1c31be1 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -932,6 +932,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
(void)next;
}
+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
+
static inline void vma_iter_free(struct vma_iterator *vmi)
{
mas_destroy(&vmi->mas);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062042-thrill-gyration-f247@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 27 May 2025 23:23:53 +0200
Subject: [PATCH] mm/hugetlb: unshare page tables during VMA split, not before
Currently, __split_vma() triggers hugetlb page table unsharing through
vm_ops->may_split(). This happens before the VMA lock and rmap locks are
taken - which is too early, it allows racing VMA-locked page faults in our
process and racing rmap walks from other processes to cause page tables to
be shared again before we actually perform the split.
Fix it by explicitly calling into the hugetlb unshare logic from
__split_vma() in the same place where THP splitting also happens. At that
point, both the VMA and the rmap(s) are write-locked.
An annoying detail is that we can now call into the helper
hugetlb_unshare_pmds() from two different locking contexts:
1. from hugetlb_split(), holding:
- mmap lock (exclusively)
- VMA lock
- file rmap lock (exclusively)
2. hugetlb_unshare_all_pmds(), which I think is designed to be able to
call us with only the mmap lock held (in shared mode), but currently
only runs while holding mmap lock (exclusively) and VMA lock
Backporting note:
This commit fixes a racy protection that was introduced in commit
b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that
commit claimed to fix an issue introduced in 5.13, but it should actually
also go all the way back.
[jannh(a)google.com: v2]
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1…
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1…
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58…
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs]
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0598f36931de..42f374e828a2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
}
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f0b1d53079f9..7ba020d489d4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -121,7 +121,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, bool take_locks);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static void hugetlb_free_folio(struct folio *folio)
@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
+ return 0;
+}
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
+{
/*
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+ * This function is called in the middle of a VMA split operation, with
+ * MM, VMA and rmap all write-locked to prevent concurrent page table
+ * walks (except hardware and gup_fast()).
*/
+ vma_assert_write_locked(vma);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
if (addr & ~PUD_MASK) {
- /*
- * hugetlb_vm_op_split is called right before we attempt to
- * split the VMA. We will need to unshare PMDs in the old and
- * new VMAs, so let's unshare before we split.
- */
unsigned long floor = addr & PUD_MASK;
unsigned long ceil = floor + PUD_SIZE;
- if (floor >= vma->vm_start && ceil <= vma->vm_end)
- hugetlb_unshare_pmds(vma, floor, ceil);
+ if (floor >= vma->vm_start && ceil <= vma->vm_end) {
+ /*
+ * Locking:
+ * Use take_locks=false here.
+ * The file rmap lock is already held.
+ * The hugetlb VMA lock can't be taken when we already
+ * hold the file rmap lock, and we don't need it because
+ * its purpose is to synchronize against concurrent page
+ * table walks, which are not possible thanks to the
+ * locks held by our caller.
+ */
+ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
+ }
}
-
- return 0;
}
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
@@ -7885,9 +7899,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
spin_unlock_irq(&hugetlb_lock);
}
+/*
+ * If @take_locks is false, the caller must ensure that no concurrent page table
+ * access can happen (except for gup_fast() and hardware page walks).
+ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
+ * concurrent page fault handling) and the file rmap lock.
+ */
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool take_locks)
{
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
@@ -7911,8 +7932,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
start, end);
mmu_notifier_invalidate_range_start(&range);
- hugetlb_vma_lock_write(vma);
- i_mmap_lock_write(vma->vm_file->f_mapping);
+ if (take_locks) {
+ hugetlb_vma_lock_write(vma);
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+ }
for (address = start; address < end; address += PUD_SIZE) {
ptep = hugetlb_walk(vma, address, sz);
if (!ptep)
@@ -7922,8 +7947,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
spin_unlock(ptl);
}
flush_hugetlb_tlb_range(vma, start, end);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
- hugetlb_vma_unlock_write(vma);
+ if (take_locks) {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
/*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst.
@@ -7938,7 +7965,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
- ALIGN_DOWN(vma->vm_end, PUD_SIZE));
+ ALIGN_DOWN(vma->vm_end, PUD_SIZE),
+ /* take_locks = */ true);
}
/*
diff --git a/mm/vma.c b/mm/vma.c
index 1c6595f282e5..7ebc9eb608f4 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -539,7 +539,14 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
init_vma_prep(&vp, vma);
vp.insert = new;
vma_prepare(&vp);
+
+ /*
+ * Get rid of huge pages and shared page tables straddling the split
+ * boundary.
+ */
vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_split(vma, addr);
if (new_below) {
vma->vm_start = addr;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 441feb21aa5a..4505b1c31be1 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -932,6 +932,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
(void)next;
}
+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
+
static inline void vma_iter_free(struct vma_iterator *vmi)
{
mas_destroy(&vmi->mas);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062039-jet-eskimo-201a@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 27 May 2025 23:23:53 +0200
Subject: [PATCH] mm/hugetlb: unshare page tables during VMA split, not before
Currently, __split_vma() triggers hugetlb page table unsharing through
vm_ops->may_split(). This happens before the VMA lock and rmap locks are
taken - which is too early, it allows racing VMA-locked page faults in our
process and racing rmap walks from other processes to cause page tables to
be shared again before we actually perform the split.
Fix it by explicitly calling into the hugetlb unshare logic from
__split_vma() in the same place where THP splitting also happens. At that
point, both the VMA and the rmap(s) are write-locked.
An annoying detail is that we can now call into the helper
hugetlb_unshare_pmds() from two different locking contexts:
1. from hugetlb_split(), holding:
- mmap lock (exclusively)
- VMA lock
- file rmap lock (exclusively)
2. hugetlb_unshare_all_pmds(), which I think is designed to be able to
call us with only the mmap lock held (in shared mode), but currently
only runs while holding mmap lock (exclusively) and VMA lock
Backporting note:
This commit fixes a racy protection that was introduced in commit
b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that
commit claimed to fix an issue introduced in 5.13, but it should actually
also go all the way back.
[jannh(a)google.com: v2]
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1…
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1…
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58…
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs]
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0598f36931de..42f374e828a2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
}
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f0b1d53079f9..7ba020d489d4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -121,7 +121,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, bool take_locks);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static void hugetlb_free_folio(struct folio *folio)
@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
+ return 0;
+}
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
+{
/*
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+ * This function is called in the middle of a VMA split operation, with
+ * MM, VMA and rmap all write-locked to prevent concurrent page table
+ * walks (except hardware and gup_fast()).
*/
+ vma_assert_write_locked(vma);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
if (addr & ~PUD_MASK) {
- /*
- * hugetlb_vm_op_split is called right before we attempt to
- * split the VMA. We will need to unshare PMDs in the old and
- * new VMAs, so let's unshare before we split.
- */
unsigned long floor = addr & PUD_MASK;
unsigned long ceil = floor + PUD_SIZE;
- if (floor >= vma->vm_start && ceil <= vma->vm_end)
- hugetlb_unshare_pmds(vma, floor, ceil);
+ if (floor >= vma->vm_start && ceil <= vma->vm_end) {
+ /*
+ * Locking:
+ * Use take_locks=false here.
+ * The file rmap lock is already held.
+ * The hugetlb VMA lock can't be taken when we already
+ * hold the file rmap lock, and we don't need it because
+ * its purpose is to synchronize against concurrent page
+ * table walks, which are not possible thanks to the
+ * locks held by our caller.
+ */
+ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
+ }
}
-
- return 0;
}
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
@@ -7885,9 +7899,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
spin_unlock_irq(&hugetlb_lock);
}
+/*
+ * If @take_locks is false, the caller must ensure that no concurrent page table
+ * access can happen (except for gup_fast() and hardware page walks).
+ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
+ * concurrent page fault handling) and the file rmap lock.
+ */
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool take_locks)
{
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
@@ -7911,8 +7932,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
start, end);
mmu_notifier_invalidate_range_start(&range);
- hugetlb_vma_lock_write(vma);
- i_mmap_lock_write(vma->vm_file->f_mapping);
+ if (take_locks) {
+ hugetlb_vma_lock_write(vma);
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+ }
for (address = start; address < end; address += PUD_SIZE) {
ptep = hugetlb_walk(vma, address, sz);
if (!ptep)
@@ -7922,8 +7947,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
spin_unlock(ptl);
}
flush_hugetlb_tlb_range(vma, start, end);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
- hugetlb_vma_unlock_write(vma);
+ if (take_locks) {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
/*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst.
@@ -7938,7 +7965,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
- ALIGN_DOWN(vma->vm_end, PUD_SIZE));
+ ALIGN_DOWN(vma->vm_end, PUD_SIZE),
+ /* take_locks = */ true);
}
/*
diff --git a/mm/vma.c b/mm/vma.c
index 1c6595f282e5..7ebc9eb608f4 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -539,7 +539,14 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
init_vma_prep(&vp, vma);
vp.insert = new;
vma_prepare(&vp);
+
+ /*
+ * Get rid of huge pages and shared page tables straddling the split
+ * boundary.
+ */
vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_split(vma, addr);
if (new_below) {
vma->vm_start = addr;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 441feb21aa5a..4505b1c31be1 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -932,6 +932,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
(void)next;
}
+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
+
static inline void vma_iter_free(struct vma_iterator *vmi)
{
mas_destroy(&vmi->mas);
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 960013ec5b5e86c2c096e79ce6e08bce970650b3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062020-pumice-wad-3173@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 960013ec5b5e86c2c096e79ce6e08bce970650b3 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Wed, 7 May 2025 21:47:45 +0100
Subject: [PATCH] net: qede: Initialize qede_ll_ops with designated initializer
After a recent change [1] in clang's randstruct implementation to
randomize structures that only contain function pointers, there is an
error because qede_ll_ops get randomized but does not use a designated
initializer for the first member:
drivers/net/ethernet/qlogic/qede/qede_main.c:206:2: error: a randomized struct can only be initialized with a designated initializer
206 | {
| ^
Explicitly initialize the common member using a designated initializer
to fix the build.
Cc: stable(a)vger.kernel.org
Fixes: 035f7f87b729 ("randstruct: Enable Clang support")
Link: https://github.com/llvm/llvm-project/commit/04364fb888eea6db9811510607bed4b… [1]
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Reviewed-by: Kees Cook <kees(a)kernel.org>
Link: https://lore.kernel.org/r/20250507-qede-fix-clang-randstruct-v1-1-5ccc15626…
Signed-off-by: Kees Cook <kees(a)kernel.org>
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 99df00c30b8c..b5d744d2586f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -203,7 +203,7 @@ static struct pci_driver qede_pci_driver = {
};
static struct qed_eth_cb_ops qede_ll_ops = {
- {
+ .common = {
#ifdef CONFIG_RFS_ACCEL
.arfs_filter_op = qede_arfs_filter_op,
#endif
From: Kan Liang <kan.liang(a)linux.intel.com>
perf_fuzzzer reported an unchecked MSR access error.
[12646.001692] unchecked MSR access error: WRMSR to 0x3f1
(tried to write 0x0001000000000001) at
rIP: 0xffffffffa98932af (native_write_msr+0xf/0x20)
[12646.001698] Call Trace:
[12646.001700] <TASK>
[12646.001700] intel_pmu_pebs_enable_all+0x2c/0x40
[12646.001703] intel_pmu_enable_all+0xe/0x20
[12646.001705] ctx_resched+0x227/0x280
[12646.001708] event_function+0x8f/0xd0
Thank Vince very much for providing a small reproducible test case.
https://lore.kernel.org/lkml/d12d4300-9926-5e58-6515-a53cb5c7bee0@maine.edu/
The error is because perf mistakenly creates a precise Topdown perf
metrics event, INTEL_TD_METRIC_RETIRING, which uses the idx 48
internally.
The Topdown perf metrics events never be a precise event (PEBS). Any
illegal creation should be filtered out by the intel_pmu_hw_config.
However, the is_available_metric_event() failed to detect the Topdown
perf metrics event. The filter is not applied.
To detect an event, the pure event encoding should be used, rather than
the whole event->attr.config. Only check the pure event encoding in
is_available_metric_event.
Fixes: 1ab5f235c176 ("perf/x86/intel: Filter unsupported Topdown metrics event")
Reported-by: Vince Weaver <vincent.weaver(a)maine.edu>
Closes: https://lore.kernel.org/lkml/14d3167e-4dad-f68e-822f-21cd86eab873@maine.edu/
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/events/intel/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8f2e36ad89db..bf5ca4cb232b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4082,7 +4082,7 @@ static int core_pmu_hw_config(struct perf_event *event)
static bool is_available_metric_event(struct perf_event *event)
{
return is_metric_event(event) &&
- event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX;
+ (event->attr.config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_AVAILABLE_MAX;
}
static inline bool is_mem_loads_event(struct perf_event *event)
--
2.38.1
From: Ihor Solodrai <ihor.solodrai(a)pm.me>
Recently perf_link test started unreliably failing on libbpf CI:
* https://github.com/libbpf/libbpf/actions/runs/11260672407/job/31312405473
* https://github.com/libbpf/libbpf/actions/runs/11260992334/job/31315514626
* https://github.com/libbpf/libbpf/actions/runs/11263162459/job/31320458251
Part of the test is running a dummy loop for a while and then checking
for a counter incremented by the test program.
Instead of waiting for an arbitrary number of loop iterations once,
check for the test counter in a loop and use get_time_ns() helper to
enforce a 100ms timeout.
v1: https://lore.kernel.org/bpf/zuRd072x9tumn2iN4wDNs5av0nu5nekMNV4PkR-YwCT10eF…
Signed-off-by: Ihor Solodrai <ihor.solodrai(a)pm.me>
Signed-off-by: Andrii Nakryiko <andrii(a)kernel.org>
Link: https://lore.kernel.org/bpf/20241011153104.249800-1-ihor.solodrai@pm.me
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu(a)suse.com>
---
.../testing/selftests/bpf/prog_tests/perf_link.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
index 3a25f1c743a1..d940ff87fa08 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -4,8 +4,12 @@
#include <pthread.h>
#include <sched.h>
#include <test_progs.h>
+#include "testing_helpers.h"
#include "test_perf_link.skel.h"
+#define BURN_TIMEOUT_MS 100
+#define BURN_TIMEOUT_NS BURN_TIMEOUT_MS * 1000000
+
static void burn_cpu(void)
{
volatile int j = 0;
@@ -32,6 +36,7 @@ void serial_test_perf_link(void)
int run_cnt_before, run_cnt_after;
struct bpf_link_info info;
__u32 info_len = sizeof(info);
+ __u64 timeout_time_ns;
/* create perf event */
memset(&attr, 0, sizeof(attr));
@@ -63,8 +68,14 @@ void serial_test_perf_link(void)
ASSERT_GT(info.prog_id, 0, "link_prog_id");
/* ensure we get at least one perf_event prog execution */
- burn_cpu();
- ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt");
+ timeout_time_ns = get_time_ns() + BURN_TIMEOUT_NS;
+ while (true) {
+ burn_cpu();
+ if (skel->bss->run_cnt > 0)
+ break;
+ if (!ASSERT_LT(get_time_ns(), timeout_time_ns, "run_cnt_timeout"))
+ break;
+ }
/* perf_event is still active, but we close link and BPF program
* shouldn't be executed anymore
--
2.49.0
Hi stable team,
On 2025-06-19 22:26:18-0400, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> tools/nolibc: use intmax definitions from compiler
>
> to the 6.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> tools-nolibc-use-intmax-definitions-from-compiler.patch
> and it can be found in the queue-6.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
As discussed in [0] and its ancestors, please only backport nolibc
patches are were explicitly tagged for stable@.
[0] https://lore.kernel.org/lkml/2025061907-finance-dodgy-b0ae@gregkh/
Thanks,
Thomas
> commit fb60b190f25c7958f85fdae9b7024548139de281
> Author: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>
> Date: Fri Apr 11 11:00:39 2025 +0200
>
> tools/nolibc: use intmax definitions from compiler
>
> [ Upstream commit e5407c0820ea5fa7117b85ed32b724af73156d63 ]
>
> The printf format checking in the compiler uses the intmax types from
> the compiler, not libc. This can lead to compiler errors.
>
> Instead use the types already provided by the compiler.
>
> Example issue with clang 19 for arm64:
>
> nolibc-test.c:30:2: error: format specifies type 'uintmax_t' (aka 'unsigned long') but the argument has type 'uintmax_t' (aka 'unsigned long long') [-Werror,-Wformat]
>
> Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>
> Acked-by: Willy Tarreau <w(a)1wt.eu>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h
> index cd79ddd6170e0..b052ad6303c38 100644
> --- a/tools/include/nolibc/stdint.h
> +++ b/tools/include/nolibc/stdint.h
> @@ -39,8 +39,8 @@ typedef size_t uint_fast32_t;
> typedef int64_t int_fast64_t;
> typedef uint64_t uint_fast64_t;
>
> -typedef int64_t intmax_t;
> -typedef uint64_t uintmax_t;
> +typedef __INTMAX_TYPE__ intmax_t;
> +typedef __UINTMAX_TYPE__ uintmax_t;
>
> /* limits of integral types */
>
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 7ca52541c05c832d32b112274f81a985101f9ba8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062025-unengaged-regroup-c3c7@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ca52541c05c832d32b112274f81a985101f9ba8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Wed, 11 Jun 2025 08:35:01 +0000
Subject: [PATCH] net_sched: sch_sfq: reject invalid perturb period
Gerrard Tai reported that SFQ perturb_period has no range check yet,
and this can be used to trigger a race condition fixed in a separate patch.
We want to make sure ctl->perturb_period * HZ will not overflow
and is positive.
Tested:
tc qd add dev lo root sfq perturb -10 # negative value : error
Error: sch_sfq: invalid perturb period.
tc qd add dev lo root sfq perturb 1000000000 # too big : error
Error: sch_sfq: invalid perturb period.
tc qd add dev lo root sfq perturb 2000000 # acceptable value
tc -s -d qd sh dev lo
qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Gerrard Tai <gerrard.tai(a)starlabs.sg>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 77fa02f2bfcd..a8cca549b5a2 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
return -EINVAL;
}
+
+ if (ctl->perturb_period < 0 ||
+ ctl->perturb_period > INT_MAX / HZ) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
+ return -EINVAL;
+ }
+ perturb_period = ctl->perturb_period * HZ;
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
headdrop = q->headdrop;
maxdepth = q->maxdepth;
maxflows = q->maxflows;
- perturb_period = q->perturb_period;
quantum = q->quantum;
flags = q->flags;
/* update and validate configuration */
if (ctl->quantum)
quantum = ctl->quantum;
- perturb_period = ctl->perturb_period * HZ;
if (ctl->flows)
maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
Hey,
Can we please get
4570355f8eaa476164cfb7ca959fdbf0cebbc9eb
Author: Zhi Wang <zhiw(a)nvidia.com>
Date: Thu Feb 27 01:35:53 2025 +0000
drm/nouveau/nvkm: factor out current GSP RPC command policies
a738fa9105ac2897701ba4067c33e85faa27d1e2
Author: Zhi Wang <zhiw(a)nvidia.com>
Date: Thu Feb 27 01:35:54 2025 +0000
drm/nouveau/nvkm: introduce new GSP reply policy NVKM_GSP_RPC_REPLY_POLL
Into 6.15 stable they fix a major regression in suspend/resume on nouveau.
Thanks,
Dave.
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x c50784e99f0e7199cdb12dbddf02229b102744ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062042-handcraft-chitchat-8110@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c50784e99f0e7199cdb12dbddf02229b102744ef Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj(a)kernel.org>
Date: Fri, 13 Jun 2025 13:23:07 -1000
Subject: [PATCH] sched_ext: Make scx_group_set_weight() always update
tg->scx.weight
Otherwise, tg->scx.weight can go out of sync while scx_cgroup is not enabled
and ops.cgroup_init() may be called with a stale weight value.
Signed-off-by: Tejun Heo <tj(a)kernel.org>
Fixes: 819513666966 ("sched_ext: Add cgroup support")
Cc: stable(a)vger.kernel.org # v6.12+
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 2c41c78be61e..33a0d8c6ff95 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4241,12 +4241,12 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight)
percpu_down_read(&scx_cgroup_rwsem);
- if (scx_cgroup_enabled && tg->scx_weight != weight) {
- if (SCX_HAS_OP(sch, cgroup_set_weight))
- SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
- tg_cgrp(tg), weight);
- tg->scx_weight = weight;
- }
+ if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) &&
+ tg->scx_weight != weight)
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
+ tg_cgrp(tg), weight);
+
+ tg->scx_weight = weight;
percpu_up_read(&scx_cgroup_rwsem);
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x c50784e99f0e7199cdb12dbddf02229b102744ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062042-retention-refocus-df92@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c50784e99f0e7199cdb12dbddf02229b102744ef Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj(a)kernel.org>
Date: Fri, 13 Jun 2025 13:23:07 -1000
Subject: [PATCH] sched_ext: Make scx_group_set_weight() always update
tg->scx.weight
Otherwise, tg->scx.weight can go out of sync while scx_cgroup is not enabled
and ops.cgroup_init() may be called with a stale weight value.
Signed-off-by: Tejun Heo <tj(a)kernel.org>
Fixes: 819513666966 ("sched_ext: Add cgroup support")
Cc: stable(a)vger.kernel.org # v6.12+
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 2c41c78be61e..33a0d8c6ff95 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4241,12 +4241,12 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight)
percpu_down_read(&scx_cgroup_rwsem);
- if (scx_cgroup_enabled && tg->scx_weight != weight) {
- if (SCX_HAS_OP(sch, cgroup_set_weight))
- SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
- tg_cgrp(tg), weight);
- tg->scx_weight = weight;
- }
+ if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) &&
+ tg->scx_weight != weight)
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
+ tg_cgrp(tg), weight);
+
+ tg->scx_weight = weight;
percpu_up_read(&scx_cgroup_rwsem);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5808c34216954cd832bd4b8bc52dfa287049122b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062037-vocation-stylus-d86b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5808c34216954cd832bd4b8bc52dfa287049122b Mon Sep 17 00:00:00 2001
From: Rong Zhang <i(a)rong.moe>
Date: Mon, 26 May 2025 04:18:07 +0800
Subject: [PATCH] platform/x86: ideapad-laptop: use usleep_range() for EC
polling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It was reported that ideapad-laptop sometimes causes some recent (since
2024) Lenovo ThinkBook models shut down when:
- suspending/resuming
- closing/opening the lid
- (dis)connecting a charger
- reading/writing some sysfs properties, e.g., fan_mode, touchpad
- pressing down some Fn keys, e.g., Brightness Up/Down (Fn+F5/F6)
- (seldom) loading the kmod
The issue has existed since the launch day of such models, and there
have been some out-of-tree workarounds (see Link:) for the issue. One
disables some functionalities, while another one simply shortens
IDEAPAD_EC_TIMEOUT. The disabled functionalities have read_ec_data() in
their call chains, which calls schedule() between each poll.
It turns out that these models suffer from the indeterminacy of
schedule() because of their low tolerance for being polled too
frequently. Sometimes schedule() returns too soon due to the lack of
ready tasks, causing the margin between two polls to be too short.
In this case, the command is somehow aborted, and too many subsequent
polls (they poll for "nothing!") may eventually break the state machine
in the EC, resulting in a hard shutdown. This explains why shortening
IDEAPAD_EC_TIMEOUT works around the issue - it reduces the total number
of polls sent to the EC.
Even when it doesn't lead to a shutdown, frequent polls may also disturb
the ongoing operation and notably delay (+ 10-20ms) the availability of
EC response. This phenomenon is unlikely to be exclusive to the models
mentioned above, so dropping the schedule() manner should also slightly
improve the responsiveness of various models.
Fix these issues by migrating to usleep_range(150, 300). The interval is
chosen to add some margin to the minimal 50us and considering EC
responses are usually available after 150-2500us based on my test. It
should be enough to fix these issues on all models subject to the EC bug
without introducing latency on other models.
Tested on ThinkBook 14 G7+ ASP and solved both issues. No regression was
introduced in the test on a model without the EC bug (ThinkBook X IMH,
thanks Eric).
Link: https://github.com/ty2/ideapad-laptop-tb2024g6plus/commit/6c5db18c9e8109873…
Link: https://github.com/ferstar/ideapad-laptop-tb/commit/42d1e68e5009529d31bd23f…
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218771
Fixes: 6a09f21dd1e2 ("ideapad: add ACPI helpers")
Cc: stable(a)vger.kernel.org
Tested-by: Felix Yan <felixonmars(a)archlinux.org>
Tested-by: Eric Long <i(a)hack3r.moe>
Tested-by: Jianfei Zhang <zhangjianfei3(a)gmail.com>
Tested-by: Mingcong Bai <jeffbai(a)aosc.io>
Tested-by: Minh Le <minhld139(a)gmail.com>
Tested-by: Sicheng Zhu <Emmet_Z(a)outlook.com>
Signed-off-by: Rong Zhang <i(a)rong.moe>
Link: https://lore.kernel.org/r/20250525201833.37939-1-i@rong.moe
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index ede483573fe0..b5e4da6a6779 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/cleanup.h>
#include <linux/debugfs.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dmi.h>
#include <linux/i8042.h>
@@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv)
*/
#define IDEAPAD_EC_TIMEOUT 200 /* in ms */
+/*
+ * Some models (e.g., ThinkBook since 2024) have a low tolerance for being
+ * polled too frequently. Doing so may break the state machine in the EC,
+ * resulting in a hard shutdown.
+ *
+ * It is also observed that frequent polls may disturb the ongoing operation
+ * and notably delay the availability of EC response.
+ *
+ * These values are used as the delay before the first poll and the interval
+ * between subsequent polls to solve the above issues.
+ */
+#define IDEAPAD_EC_POLL_MIN_US 150
+#define IDEAPAD_EC_POLL_MAX_US 300
+
static int eval_int(acpi_handle handle, const char *name, unsigned long *res)
{
unsigned long long result;
@@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
@@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5808c34216954cd832bd4b8bc52dfa287049122b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062036-chihuahua-antidote-eca5@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5808c34216954cd832bd4b8bc52dfa287049122b Mon Sep 17 00:00:00 2001
From: Rong Zhang <i(a)rong.moe>
Date: Mon, 26 May 2025 04:18:07 +0800
Subject: [PATCH] platform/x86: ideapad-laptop: use usleep_range() for EC
polling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It was reported that ideapad-laptop sometimes causes some recent (since
2024) Lenovo ThinkBook models shut down when:
- suspending/resuming
- closing/opening the lid
- (dis)connecting a charger
- reading/writing some sysfs properties, e.g., fan_mode, touchpad
- pressing down some Fn keys, e.g., Brightness Up/Down (Fn+F5/F6)
- (seldom) loading the kmod
The issue has existed since the launch day of such models, and there
have been some out-of-tree workarounds (see Link:) for the issue. One
disables some functionalities, while another one simply shortens
IDEAPAD_EC_TIMEOUT. The disabled functionalities have read_ec_data() in
their call chains, which calls schedule() between each poll.
It turns out that these models suffer from the indeterminacy of
schedule() because of their low tolerance for being polled too
frequently. Sometimes schedule() returns too soon due to the lack of
ready tasks, causing the margin between two polls to be too short.
In this case, the command is somehow aborted, and too many subsequent
polls (they poll for "nothing!") may eventually break the state machine
in the EC, resulting in a hard shutdown. This explains why shortening
IDEAPAD_EC_TIMEOUT works around the issue - it reduces the total number
of polls sent to the EC.
Even when it doesn't lead to a shutdown, frequent polls may also disturb
the ongoing operation and notably delay (+ 10-20ms) the availability of
EC response. This phenomenon is unlikely to be exclusive to the models
mentioned above, so dropping the schedule() manner should also slightly
improve the responsiveness of various models.
Fix these issues by migrating to usleep_range(150, 300). The interval is
chosen to add some margin to the minimal 50us and considering EC
responses are usually available after 150-2500us based on my test. It
should be enough to fix these issues on all models subject to the EC bug
without introducing latency on other models.
Tested on ThinkBook 14 G7+ ASP and solved both issues. No regression was
introduced in the test on a model without the EC bug (ThinkBook X IMH,
thanks Eric).
Link: https://github.com/ty2/ideapad-laptop-tb2024g6plus/commit/6c5db18c9e8109873…
Link: https://github.com/ferstar/ideapad-laptop-tb/commit/42d1e68e5009529d31bd23f…
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218771
Fixes: 6a09f21dd1e2 ("ideapad: add ACPI helpers")
Cc: stable(a)vger.kernel.org
Tested-by: Felix Yan <felixonmars(a)archlinux.org>
Tested-by: Eric Long <i(a)hack3r.moe>
Tested-by: Jianfei Zhang <zhangjianfei3(a)gmail.com>
Tested-by: Mingcong Bai <jeffbai(a)aosc.io>
Tested-by: Minh Le <minhld139(a)gmail.com>
Tested-by: Sicheng Zhu <Emmet_Z(a)outlook.com>
Signed-off-by: Rong Zhang <i(a)rong.moe>
Link: https://lore.kernel.org/r/20250525201833.37939-1-i@rong.moe
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index ede483573fe0..b5e4da6a6779 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/cleanup.h>
#include <linux/debugfs.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dmi.h>
#include <linux/i8042.h>
@@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv)
*/
#define IDEAPAD_EC_TIMEOUT 200 /* in ms */
+/*
+ * Some models (e.g., ThinkBook since 2024) have a low tolerance for being
+ * polled too frequently. Doing so may break the state machine in the EC,
+ * resulting in a hard shutdown.
+ *
+ * It is also observed that frequent polls may disturb the ongoing operation
+ * and notably delay the availability of EC response.
+ *
+ * These values are used as the delay before the first poll and the interval
+ * between subsequent polls to solve the above issues.
+ */
+#define IDEAPAD_EC_POLL_MIN_US 150
+#define IDEAPAD_EC_POLL_MAX_US 300
+
static int eval_int(acpi_handle handle, const char *name, unsigned long *res)
{
unsigned long long result;
@@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
@@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5808c34216954cd832bd4b8bc52dfa287049122b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062035-flattery-salvation-30e9@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5808c34216954cd832bd4b8bc52dfa287049122b Mon Sep 17 00:00:00 2001
From: Rong Zhang <i(a)rong.moe>
Date: Mon, 26 May 2025 04:18:07 +0800
Subject: [PATCH] platform/x86: ideapad-laptop: use usleep_range() for EC
polling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It was reported that ideapad-laptop sometimes causes some recent (since
2024) Lenovo ThinkBook models shut down when:
- suspending/resuming
- closing/opening the lid
- (dis)connecting a charger
- reading/writing some sysfs properties, e.g., fan_mode, touchpad
- pressing down some Fn keys, e.g., Brightness Up/Down (Fn+F5/F6)
- (seldom) loading the kmod
The issue has existed since the launch day of such models, and there
have been some out-of-tree workarounds (see Link:) for the issue. One
disables some functionalities, while another one simply shortens
IDEAPAD_EC_TIMEOUT. The disabled functionalities have read_ec_data() in
their call chains, which calls schedule() between each poll.
It turns out that these models suffer from the indeterminacy of
schedule() because of their low tolerance for being polled too
frequently. Sometimes schedule() returns too soon due to the lack of
ready tasks, causing the margin between two polls to be too short.
In this case, the command is somehow aborted, and too many subsequent
polls (they poll for "nothing!") may eventually break the state machine
in the EC, resulting in a hard shutdown. This explains why shortening
IDEAPAD_EC_TIMEOUT works around the issue - it reduces the total number
of polls sent to the EC.
Even when it doesn't lead to a shutdown, frequent polls may also disturb
the ongoing operation and notably delay (+ 10-20ms) the availability of
EC response. This phenomenon is unlikely to be exclusive to the models
mentioned above, so dropping the schedule() manner should also slightly
improve the responsiveness of various models.
Fix these issues by migrating to usleep_range(150, 300). The interval is
chosen to add some margin to the minimal 50us and considering EC
responses are usually available after 150-2500us based on my test. It
should be enough to fix these issues on all models subject to the EC bug
without introducing latency on other models.
Tested on ThinkBook 14 G7+ ASP and solved both issues. No regression was
introduced in the test on a model without the EC bug (ThinkBook X IMH,
thanks Eric).
Link: https://github.com/ty2/ideapad-laptop-tb2024g6plus/commit/6c5db18c9e8109873…
Link: https://github.com/ferstar/ideapad-laptop-tb/commit/42d1e68e5009529d31bd23f…
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218771
Fixes: 6a09f21dd1e2 ("ideapad: add ACPI helpers")
Cc: stable(a)vger.kernel.org
Tested-by: Felix Yan <felixonmars(a)archlinux.org>
Tested-by: Eric Long <i(a)hack3r.moe>
Tested-by: Jianfei Zhang <zhangjianfei3(a)gmail.com>
Tested-by: Mingcong Bai <jeffbai(a)aosc.io>
Tested-by: Minh Le <minhld139(a)gmail.com>
Tested-by: Sicheng Zhu <Emmet_Z(a)outlook.com>
Signed-off-by: Rong Zhang <i(a)rong.moe>
Link: https://lore.kernel.org/r/20250525201833.37939-1-i@rong.moe
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index ede483573fe0..b5e4da6a6779 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/cleanup.h>
#include <linux/debugfs.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dmi.h>
#include <linux/i8042.h>
@@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv)
*/
#define IDEAPAD_EC_TIMEOUT 200 /* in ms */
+/*
+ * Some models (e.g., ThinkBook since 2024) have a low tolerance for being
+ * polled too frequently. Doing so may break the state machine in the EC,
+ * resulting in a hard shutdown.
+ *
+ * It is also observed that frequent polls may disturb the ongoing operation
+ * and notably delay the availability of EC response.
+ *
+ * These values are used as the delay before the first poll and the interval
+ * between subsequent polls to solve the above issues.
+ */
+#define IDEAPAD_EC_POLL_MIN_US 150
+#define IDEAPAD_EC_POLL_MAX_US 300
+
static int eval_int(acpi_handle handle, const char *name, unsigned long *res)
{
unsigned long long result;
@@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
@@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5808c34216954cd832bd4b8bc52dfa287049122b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062034-morbidity-saddling-e68c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5808c34216954cd832bd4b8bc52dfa287049122b Mon Sep 17 00:00:00 2001
From: Rong Zhang <i(a)rong.moe>
Date: Mon, 26 May 2025 04:18:07 +0800
Subject: [PATCH] platform/x86: ideapad-laptop: use usleep_range() for EC
polling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It was reported that ideapad-laptop sometimes causes some recent (since
2024) Lenovo ThinkBook models shut down when:
- suspending/resuming
- closing/opening the lid
- (dis)connecting a charger
- reading/writing some sysfs properties, e.g., fan_mode, touchpad
- pressing down some Fn keys, e.g., Brightness Up/Down (Fn+F5/F6)
- (seldom) loading the kmod
The issue has existed since the launch day of such models, and there
have been some out-of-tree workarounds (see Link:) for the issue. One
disables some functionalities, while another one simply shortens
IDEAPAD_EC_TIMEOUT. The disabled functionalities have read_ec_data() in
their call chains, which calls schedule() between each poll.
It turns out that these models suffer from the indeterminacy of
schedule() because of their low tolerance for being polled too
frequently. Sometimes schedule() returns too soon due to the lack of
ready tasks, causing the margin between two polls to be too short.
In this case, the command is somehow aborted, and too many subsequent
polls (they poll for "nothing!") may eventually break the state machine
in the EC, resulting in a hard shutdown. This explains why shortening
IDEAPAD_EC_TIMEOUT works around the issue - it reduces the total number
of polls sent to the EC.
Even when it doesn't lead to a shutdown, frequent polls may also disturb
the ongoing operation and notably delay (+ 10-20ms) the availability of
EC response. This phenomenon is unlikely to be exclusive to the models
mentioned above, so dropping the schedule() manner should also slightly
improve the responsiveness of various models.
Fix these issues by migrating to usleep_range(150, 300). The interval is
chosen to add some margin to the minimal 50us and considering EC
responses are usually available after 150-2500us based on my test. It
should be enough to fix these issues on all models subject to the EC bug
without introducing latency on other models.
Tested on ThinkBook 14 G7+ ASP and solved both issues. No regression was
introduced in the test on a model without the EC bug (ThinkBook X IMH,
thanks Eric).
Link: https://github.com/ty2/ideapad-laptop-tb2024g6plus/commit/6c5db18c9e8109873…
Link: https://github.com/ferstar/ideapad-laptop-tb/commit/42d1e68e5009529d31bd23f…
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218771
Fixes: 6a09f21dd1e2 ("ideapad: add ACPI helpers")
Cc: stable(a)vger.kernel.org
Tested-by: Felix Yan <felixonmars(a)archlinux.org>
Tested-by: Eric Long <i(a)hack3r.moe>
Tested-by: Jianfei Zhang <zhangjianfei3(a)gmail.com>
Tested-by: Mingcong Bai <jeffbai(a)aosc.io>
Tested-by: Minh Le <minhld139(a)gmail.com>
Tested-by: Sicheng Zhu <Emmet_Z(a)outlook.com>
Signed-off-by: Rong Zhang <i(a)rong.moe>
Link: https://lore.kernel.org/r/20250525201833.37939-1-i@rong.moe
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index ede483573fe0..b5e4da6a6779 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/cleanup.h>
#include <linux/debugfs.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dmi.h>
#include <linux/i8042.h>
@@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv)
*/
#define IDEAPAD_EC_TIMEOUT 200 /* in ms */
+/*
+ * Some models (e.g., ThinkBook since 2024) have a low tolerance for being
+ * polled too frequently. Doing so may break the state machine in the EC,
+ * resulting in a hard shutdown.
+ *
+ * It is also observed that frequent polls may disturb the ongoing operation
+ * and notably delay the availability of EC response.
+ *
+ * These values are used as the delay before the first poll and the interval
+ * between subsequent polls to solve the above issues.
+ */
+#define IDEAPAD_EC_POLL_MIN_US 150
+#define IDEAPAD_EC_POLL_MAX_US 300
+
static int eval_int(acpi_handle handle, const char *name, unsigned long *res)
{
unsigned long long result;
@@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
@@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 5808c34216954cd832bd4b8bc52dfa287049122b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062033-fedora-humbly-2cc5@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5808c34216954cd832bd4b8bc52dfa287049122b Mon Sep 17 00:00:00 2001
From: Rong Zhang <i(a)rong.moe>
Date: Mon, 26 May 2025 04:18:07 +0800
Subject: [PATCH] platform/x86: ideapad-laptop: use usleep_range() for EC
polling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It was reported that ideapad-laptop sometimes causes some recent (since
2024) Lenovo ThinkBook models shut down when:
- suspending/resuming
- closing/opening the lid
- (dis)connecting a charger
- reading/writing some sysfs properties, e.g., fan_mode, touchpad
- pressing down some Fn keys, e.g., Brightness Up/Down (Fn+F5/F6)
- (seldom) loading the kmod
The issue has existed since the launch day of such models, and there
have been some out-of-tree workarounds (see Link:) for the issue. One
disables some functionalities, while another one simply shortens
IDEAPAD_EC_TIMEOUT. The disabled functionalities have read_ec_data() in
their call chains, which calls schedule() between each poll.
It turns out that these models suffer from the indeterminacy of
schedule() because of their low tolerance for being polled too
frequently. Sometimes schedule() returns too soon due to the lack of
ready tasks, causing the margin between two polls to be too short.
In this case, the command is somehow aborted, and too many subsequent
polls (they poll for "nothing!") may eventually break the state machine
in the EC, resulting in a hard shutdown. This explains why shortening
IDEAPAD_EC_TIMEOUT works around the issue - it reduces the total number
of polls sent to the EC.
Even when it doesn't lead to a shutdown, frequent polls may also disturb
the ongoing operation and notably delay (+ 10-20ms) the availability of
EC response. This phenomenon is unlikely to be exclusive to the models
mentioned above, so dropping the schedule() manner should also slightly
improve the responsiveness of various models.
Fix these issues by migrating to usleep_range(150, 300). The interval is
chosen to add some margin to the minimal 50us and considering EC
responses are usually available after 150-2500us based on my test. It
should be enough to fix these issues on all models subject to the EC bug
without introducing latency on other models.
Tested on ThinkBook 14 G7+ ASP and solved both issues. No regression was
introduced in the test on a model without the EC bug (ThinkBook X IMH,
thanks Eric).
Link: https://github.com/ty2/ideapad-laptop-tb2024g6plus/commit/6c5db18c9e8109873…
Link: https://github.com/ferstar/ideapad-laptop-tb/commit/42d1e68e5009529d31bd23f…
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218771
Fixes: 6a09f21dd1e2 ("ideapad: add ACPI helpers")
Cc: stable(a)vger.kernel.org
Tested-by: Felix Yan <felixonmars(a)archlinux.org>
Tested-by: Eric Long <i(a)hack3r.moe>
Tested-by: Jianfei Zhang <zhangjianfei3(a)gmail.com>
Tested-by: Mingcong Bai <jeffbai(a)aosc.io>
Tested-by: Minh Le <minhld139(a)gmail.com>
Tested-by: Sicheng Zhu <Emmet_Z(a)outlook.com>
Signed-off-by: Rong Zhang <i(a)rong.moe>
Link: https://lore.kernel.org/r/20250525201833.37939-1-i@rong.moe
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index ede483573fe0..b5e4da6a6779 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/cleanup.h>
#include <linux/debugfs.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dmi.h>
#include <linux/i8042.h>
@@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv)
*/
#define IDEAPAD_EC_TIMEOUT 200 /* in ms */
+/*
+ * Some models (e.g., ThinkBook since 2024) have a low tolerance for being
+ * polled too frequently. Doing so may break the state machine in the EC,
+ * resulting in a hard shutdown.
+ *
+ * It is also observed that frequent polls may disturb the ongoing operation
+ * and notably delay the availability of EC response.
+ *
+ * These values are used as the delay before the first poll and the interval
+ * between subsequent polls to solve the above issues.
+ */
+#define IDEAPAD_EC_POLL_MIN_US 150
+#define IDEAPAD_EC_POLL_MAX_US 300
+
static int eval_int(acpi_handle handle, const char *name, unsigned long *res)
{
unsigned long long result;
@@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
@@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 383c4613c67c26e90e8eebb72e3083457d02033f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062029-saturday-conical-0eae@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Fri, 6 Jun 2025 10:28:07 +0100
Subject: [PATCH] mm: close theoretical race where stale TLB entries could
linger
Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a
parallel reclaim leaving stale TLB entries") described a theoretical race
as such:
"""
Nadav Amit identified a theoretical race between page reclaim and mprotect
due to TLB flushes being batched outside of the PTL being held.
He described the race as follows:
CPU0 CPU1
---- ----
user accesses memory using RW PTE
[PTE now cached in TLB]
try_to_unmap_one()
==> ptep_get_and_clear()
==> set_tlb_ubc_flush_pending()
mprotect(addr, PROT_READ)
==> change_pte_range()
==> [ PTE non-present - no flush ]
user writes using cached RW PTE
...
try_to_unmap_flush()
The same type of race exists for reads when protecting for PROT_NONE and
also exists for operations that can leave an old TLB entry behind such as
munmap, mremap and madvise.
"""
The solution was to introduce flush_tlb_batched_pending() and call it
under the PTL from mprotect/madvise/munmap/mremap to complete any pending
tlb flushes.
However, while madvise_free_pte_range() and
madvise_cold_or_pageout_pte_range() were both retro-fitted to call
flush_tlb_batched_pending() immediately after initially acquiring the PTL,
they both temporarily release the PTL to split a large folio if they
stumble upon one. In this case, where re-acquiring the PTL
flush_tlb_batched_pending() must be called again, but it previously was
not. Let's fix that.
There are 2 Fixes: tags here: the first is the commit that fixed
madvise_free_pte_range(). The second is the commit that added
madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the
faulty pattern from madvise_free_pte_range().
This is a theoretical bug discovered during code review.
Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com
Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries")
Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Jann Horn <jannh(a)google.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mel Gorman <mgorman <mgorman(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/madvise.c b/mm/madvise.c
index 5f7a66a1617e..1d44a35ae85c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -508,6 +508,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
pte_offset_map_lock(mm, pmd, addr, &ptl);
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
@@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
start_pte = pte;
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 383c4613c67c26e90e8eebb72e3083457d02033f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062028-richly-basket-a70f@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Fri, 6 Jun 2025 10:28:07 +0100
Subject: [PATCH] mm: close theoretical race where stale TLB entries could
linger
Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a
parallel reclaim leaving stale TLB entries") described a theoretical race
as such:
"""
Nadav Amit identified a theoretical race between page reclaim and mprotect
due to TLB flushes being batched outside of the PTL being held.
He described the race as follows:
CPU0 CPU1
---- ----
user accesses memory using RW PTE
[PTE now cached in TLB]
try_to_unmap_one()
==> ptep_get_and_clear()
==> set_tlb_ubc_flush_pending()
mprotect(addr, PROT_READ)
==> change_pte_range()
==> [ PTE non-present - no flush ]
user writes using cached RW PTE
...
try_to_unmap_flush()
The same type of race exists for reads when protecting for PROT_NONE and
also exists for operations that can leave an old TLB entry behind such as
munmap, mremap and madvise.
"""
The solution was to introduce flush_tlb_batched_pending() and call it
under the PTL from mprotect/madvise/munmap/mremap to complete any pending
tlb flushes.
However, while madvise_free_pte_range() and
madvise_cold_or_pageout_pte_range() were both retro-fitted to call
flush_tlb_batched_pending() immediately after initially acquiring the PTL,
they both temporarily release the PTL to split a large folio if they
stumble upon one. In this case, where re-acquiring the PTL
flush_tlb_batched_pending() must be called again, but it previously was
not. Let's fix that.
There are 2 Fixes: tags here: the first is the commit that fixed
madvise_free_pte_range(). The second is the commit that added
madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the
faulty pattern from madvise_free_pte_range().
This is a theoretical bug discovered during code review.
Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com
Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries")
Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Jann Horn <jannh(a)google.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mel Gorman <mgorman <mgorman(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/madvise.c b/mm/madvise.c
index 5f7a66a1617e..1d44a35ae85c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -508,6 +508,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
pte_offset_map_lock(mm, pmd, addr, &ptl);
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
@@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
start_pte = pte;
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 383c4613c67c26e90e8eebb72e3083457d02033f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062027-tapestry-uptake-5f29@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Fri, 6 Jun 2025 10:28:07 +0100
Subject: [PATCH] mm: close theoretical race where stale TLB entries could
linger
Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a
parallel reclaim leaving stale TLB entries") described a theoretical race
as such:
"""
Nadav Amit identified a theoretical race between page reclaim and mprotect
due to TLB flushes being batched outside of the PTL being held.
He described the race as follows:
CPU0 CPU1
---- ----
user accesses memory using RW PTE
[PTE now cached in TLB]
try_to_unmap_one()
==> ptep_get_and_clear()
==> set_tlb_ubc_flush_pending()
mprotect(addr, PROT_READ)
==> change_pte_range()
==> [ PTE non-present - no flush ]
user writes using cached RW PTE
...
try_to_unmap_flush()
The same type of race exists for reads when protecting for PROT_NONE and
also exists for operations that can leave an old TLB entry behind such as
munmap, mremap and madvise.
"""
The solution was to introduce flush_tlb_batched_pending() and call it
under the PTL from mprotect/madvise/munmap/mremap to complete any pending
tlb flushes.
However, while madvise_free_pte_range() and
madvise_cold_or_pageout_pte_range() were both retro-fitted to call
flush_tlb_batched_pending() immediately after initially acquiring the PTL,
they both temporarily release the PTL to split a large folio if they
stumble upon one. In this case, where re-acquiring the PTL
flush_tlb_batched_pending() must be called again, but it previously was
not. Let's fix that.
There are 2 Fixes: tags here: the first is the commit that fixed
madvise_free_pte_range(). The second is the commit that added
madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the
faulty pattern from madvise_free_pte_range().
This is a theoretical bug discovered during code review.
Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com
Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries")
Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Jann Horn <jannh(a)google.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mel Gorman <mgorman <mgorman(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/madvise.c b/mm/madvise.c
index 5f7a66a1617e..1d44a35ae85c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -508,6 +508,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
pte_offset_map_lock(mm, pmd, addr, &ptl);
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
@@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
start_pte = pte;
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 383c4613c67c26e90e8eebb72e3083457d02033f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062026-shrewdly-trough-b30e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Fri, 6 Jun 2025 10:28:07 +0100
Subject: [PATCH] mm: close theoretical race where stale TLB entries could
linger
Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a
parallel reclaim leaving stale TLB entries") described a theoretical race
as such:
"""
Nadav Amit identified a theoretical race between page reclaim and mprotect
due to TLB flushes being batched outside of the PTL being held.
He described the race as follows:
CPU0 CPU1
---- ----
user accesses memory using RW PTE
[PTE now cached in TLB]
try_to_unmap_one()
==> ptep_get_and_clear()
==> set_tlb_ubc_flush_pending()
mprotect(addr, PROT_READ)
==> change_pte_range()
==> [ PTE non-present - no flush ]
user writes using cached RW PTE
...
try_to_unmap_flush()
The same type of race exists for reads when protecting for PROT_NONE and
also exists for operations that can leave an old TLB entry behind such as
munmap, mremap and madvise.
"""
The solution was to introduce flush_tlb_batched_pending() and call it
under the PTL from mprotect/madvise/munmap/mremap to complete any pending
tlb flushes.
However, while madvise_free_pte_range() and
madvise_cold_or_pageout_pte_range() were both retro-fitted to call
flush_tlb_batched_pending() immediately after initially acquiring the PTL,
they both temporarily release the PTL to split a large folio if they
stumble upon one. In this case, where re-acquiring the PTL
flush_tlb_batched_pending() must be called again, but it previously was
not. Let's fix that.
There are 2 Fixes: tags here: the first is the commit that fixed
madvise_free_pte_range(). The second is the commit that added
madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the
faulty pattern from madvise_free_pte_range().
This is a theoretical bug discovered during code review.
Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com
Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries")
Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Jann Horn <jannh(a)google.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mel Gorman <mgorman <mgorman(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/madvise.c b/mm/madvise.c
index 5f7a66a1617e..1d44a35ae85c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -508,6 +508,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
pte_offset_map_lock(mm, pmd, addr, &ptl);
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
@@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
start_pte = pte;
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 383c4613c67c26e90e8eebb72e3083457d02033f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062025-boogieman-flyable-ee70@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts(a)arm.com>
Date: Fri, 6 Jun 2025 10:28:07 +0100
Subject: [PATCH] mm: close theoretical race where stale TLB entries could
linger
Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a
parallel reclaim leaving stale TLB entries") described a theoretical race
as such:
"""
Nadav Amit identified a theoretical race between page reclaim and mprotect
due to TLB flushes being batched outside of the PTL being held.
He described the race as follows:
CPU0 CPU1
---- ----
user accesses memory using RW PTE
[PTE now cached in TLB]
try_to_unmap_one()
==> ptep_get_and_clear()
==> set_tlb_ubc_flush_pending()
mprotect(addr, PROT_READ)
==> change_pte_range()
==> [ PTE non-present - no flush ]
user writes using cached RW PTE
...
try_to_unmap_flush()
The same type of race exists for reads when protecting for PROT_NONE and
also exists for operations that can leave an old TLB entry behind such as
munmap, mremap and madvise.
"""
The solution was to introduce flush_tlb_batched_pending() and call it
under the PTL from mprotect/madvise/munmap/mremap to complete any pending
tlb flushes.
However, while madvise_free_pte_range() and
madvise_cold_or_pageout_pte_range() were both retro-fitted to call
flush_tlb_batched_pending() immediately after initially acquiring the PTL,
they both temporarily release the PTL to split a large folio if they
stumble upon one. In this case, where re-acquiring the PTL
flush_tlb_batched_pending() must be called again, but it previously was
not. Let's fix that.
There are 2 Fixes: tags here: the first is the commit that fixed
madvise_free_pte_range(). The second is the commit that added
madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the
faulty pattern from madvise_free_pte_range().
This is a theoretical bug discovered during code review.
Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com
Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries")
Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Jann Horn <jannh(a)google.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mel Gorman <mgorman <mgorman(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/madvise.c b/mm/madvise.c
index 5f7a66a1617e..1d44a35ae85c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -508,6 +508,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
pte_offset_map_lock(mm, pmd, addr, &ptl);
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
@@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
start_pte = pte;
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 7ca52541c05c832d32b112274f81a985101f9ba8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062026-stinger-coeditor-bb58@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ca52541c05c832d32b112274f81a985101f9ba8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Wed, 11 Jun 2025 08:35:01 +0000
Subject: [PATCH] net_sched: sch_sfq: reject invalid perturb period
Gerrard Tai reported that SFQ perturb_period has no range check yet,
and this can be used to trigger a race condition fixed in a separate patch.
We want to make sure ctl->perturb_period * HZ will not overflow
and is positive.
Tested:
tc qd add dev lo root sfq perturb -10 # negative value : error
Error: sch_sfq: invalid perturb period.
tc qd add dev lo root sfq perturb 1000000000 # too big : error
Error: sch_sfq: invalid perturb period.
tc qd add dev lo root sfq perturb 2000000 # acceptable value
tc -s -d qd sh dev lo
qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Gerrard Tai <gerrard.tai(a)starlabs.sg>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 77fa02f2bfcd..a8cca549b5a2 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
return -EINVAL;
}
+
+ if (ctl->perturb_period < 0 ||
+ ctl->perturb_period > INT_MAX / HZ) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
+ return -EINVAL;
+ }
+ perturb_period = ctl->perturb_period * HZ;
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
headdrop = q->headdrop;
maxdepth = q->maxdepth;
maxflows = q->maxflows;
- perturb_period = q->perturb_period;
quantum = q->quantum;
flags = q->flags;
/* update and validate configuration */
if (ctl->quantum)
quantum = ctl->quantum;
- perturb_period = ctl->perturb_period * HZ;
if (ctl->flows)
maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 7ca52541c05c832d32b112274f81a985101f9ba8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062026-excitable-trunks-92e6@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ca52541c05c832d32b112274f81a985101f9ba8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Wed, 11 Jun 2025 08:35:01 +0000
Subject: [PATCH] net_sched: sch_sfq: reject invalid perturb period
Gerrard Tai reported that SFQ perturb_period has no range check yet,
and this can be used to trigger a race condition fixed in a separate patch.
We want to make sure ctl->perturb_period * HZ will not overflow
and is positive.
Tested:
tc qd add dev lo root sfq perturb -10 # negative value : error
Error: sch_sfq: invalid perturb period.
tc qd add dev lo root sfq perturb 1000000000 # too big : error
Error: sch_sfq: invalid perturb period.
tc qd add dev lo root sfq perturb 2000000 # acceptable value
tc -s -d qd sh dev lo
qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Gerrard Tai <gerrard.tai(a)starlabs.sg>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 77fa02f2bfcd..a8cca549b5a2 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
return -EINVAL;
}
+
+ if (ctl->perturb_period < 0 ||
+ ctl->perturb_period > INT_MAX / HZ) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
+ return -EINVAL;
+ }
+ perturb_period = ctl->perturb_period * HZ;
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
headdrop = q->headdrop;
maxdepth = q->maxdepth;
maxflows = q->maxflows;
- perturb_period = q->perturb_period;
quantum = q->quantum;
flags = q->flags;
/* update and validate configuration */
if (ctl->quantum)
quantum = ctl->quantum;
- perturb_period = ctl->perturb_period * HZ;
if (ctl->flows)
maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 66d590b828b1fd9fa337047ae58fe1c4c6f43609
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062054-luminance-hacker-c550@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 66d590b828b1fd9fa337047ae58fe1c4c6f43609 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad(a)microsoft.com>
Date: Mon, 2 Jun 2025 22:37:12 +0530
Subject: [PATCH] cifs: deal with the channel loading lag while picking
channels
Our current approach to select a channel for sending requests is this:
1. iterate all channels to find the min and max queue depth
2. if min and max are not the same, pick the channel with min depth
3. if min and max are same, round robin, as all channels are equally loaded
The problem with this approach is that there's a lag between selecting
a channel and sending the request (that increases the queue depth on the channel).
While these numbers will eventually catch up, there could be a skew in the
channel usage, depending on the application's I/O parallelism and the server's
speed of handling requests.
With sufficient parallelism, this lag can artificially increase the queue depth,
thereby impacting the performance negatively.
This change will change the step 1 above to start the iteration from the last
selected channel. This is to reduce the skew in channel usage even in the presence
of this lag.
Fixes: ea90708d3cf3 ("cifs: use the least loaded channel for sending requests")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Shyam Prasad N <sprasad(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 266af17aa7d9..191783f553ce 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -1018,14 +1018,16 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
uint index = 0;
unsigned int min_in_flight = UINT_MAX, max_in_flight = 0;
struct TCP_Server_Info *server = NULL;
- int i;
+ int i, start, cur;
if (!ses)
return NULL;
spin_lock(&ses->chan_lock);
+ start = atomic_inc_return(&ses->chan_seq);
for (i = 0; i < ses->chan_count; i++) {
- server = ses->chans[i].server;
+ cur = (start + i) % ses->chan_count;
+ server = ses->chans[cur].server;
if (!server || server->terminate)
continue;
@@ -1042,17 +1044,15 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
*/
if (server->in_flight < min_in_flight) {
min_in_flight = server->in_flight;
- index = i;
+ index = cur;
}
if (server->in_flight > max_in_flight)
max_in_flight = server->in_flight;
}
/* if all channels are equally loaded, fall back to round-robin */
- if (min_in_flight == max_in_flight) {
- index = (uint)atomic_inc_return(&ses->chan_seq);
- index %= ses->chan_count;
- }
+ if (min_in_flight == max_in_flight)
+ index = (uint)start % ses->chan_count;
server = ses->chans[index].server;
spin_unlock(&ses->chan_lock);
Hi,
I have discovered that two small form factor desktops with Ryzen AI 7
350 and Ryzen AI 5 340 crash when woken up from suspend. I can see how
the LED on the USB mouse is switched on when I trigger a resume via
keyboard button, but the display remains black. The kernel also no
longer responds to Magic SysRq keys in this state.
The problem affects all kernels after merge b50753547453 (v6.11.0). But
this merge only adds PCI_DEVICE_ID_AMD_1AH_M60H_ROOT with commit
59c34008d (necessary to trigger this bug with Ryzen AI CPU).
I cherry-picked this commit and continued searching. Which finally led
me to commit f6098641d3e - drm/amd/display: fix s2idle entry for DCN3.5+
If I remove the code, which has changed somewhat in the meantime, then
the suspend works without any problems. See the following patch.
Regards,
Georg
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d3100f641ac6..76204ae70acc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3121,9 +3121,6 @@ static int dm_suspend(struct amdgpu_ip_block
*ip_block)
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
- if (dm->dc->caps.ips_support && adev->in_s0ix)
- dc_allow_idle_optimizations(dm->dc, true);
-
dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv,
DC_ACPI_CM_POWER_STATE_D3);
return 0;
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 595cf683519ab5a277d258a2251ee8cc7b838d6d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062039-policy-handheld-01c6@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 595cf683519ab5a277d258a2251ee8cc7b838d6d Mon Sep 17 00:00:00 2001
From: Shivank Garg <shivankg(a)amd.com>
Date: Mon, 26 May 2025 18:28:18 +0000
Subject: [PATCH] mm/khugepaged: fix race with folio split/free using temporary
reference
hpage_collapse_scan_file() calls is_refcount_suitable(), which in turn
calls folio_mapcount(). folio_mapcount() checks folio_test_large() before
proceeding to folio_large_mapcount(), but there is a race window where the
folio may get split/freed between these checks, triggering:
VM_WARN_ON_FOLIO(!folio_test_large(folio), folio)
Take a temporary reference to the folio in hpage_collapse_scan_file().
This stabilizes the folio during refcount check and prevents incorrect
large folio detection due to concurrent split/free. Use helper
folio_expected_ref_count() + 1 to compare with folio_ref_count() instead
of using is_refcount_suitable().
Link: https://lkml.kernel.org/r/20250526182818.37978-1-shivankg@amd.com
Fixes: 05c5323b2a34 ("mm: track mapcount of large folios in single value")
Signed-off-by: Shivank Garg <shivankg(a)amd.com>
Reported-by: syzbot+2b99589e33edbe9475ca(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6828470d.a70a0220.38f255.000c.GAE@google.com
Suggested-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Dev Jain <dev.jain(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Bharata B Rao <bharata(a)amd.com>
Cc: Fengwei Yin <fengwei.yin(a)intel.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mariano Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index cdf5a581368b..7731a162a1a7 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2293,6 +2293,17 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
continue;
}
+ if (!folio_try_get(folio)) {
+ xas_reset(&xas);
+ continue;
+ }
+
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
+ xas_reset(&xas);
+ continue;
+ }
+
if (folio_order(folio) == HPAGE_PMD_ORDER &&
folio->index == start) {
/* Maybe PMD-mapped */
@@ -2303,23 +2314,27 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
* it's safe to skip LRU and refcount checks before
* returning.
*/
+ folio_put(folio);
break;
}
node = folio_nid(folio);
if (hpage_collapse_scan_abort(node, cc)) {
result = SCAN_SCAN_ABORT;
+ folio_put(folio);
break;
}
cc->node_load[node]++;
if (!folio_test_lru(folio)) {
result = SCAN_PAGE_LRU;
+ folio_put(folio);
break;
}
- if (!is_refcount_suitable(folio)) {
+ if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) {
result = SCAN_PAGE_COUNT;
+ folio_put(folio);
break;
}
@@ -2331,6 +2346,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
*/
present += folio_nr_pages(folio);
+ folio_put(folio);
if (need_resched()) {
xas_pause(&xas);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1013af4f585fccc4d3e5c5824d174de2257f7d6d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062056-decree-conduit-b901@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1013af4f585fccc4d3e5c5824d174de2257f7d6d Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 27 May 2025 23:23:54 +0200
Subject: [PATCH] mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
huge_pmd_unshare() drops a reference on a page table that may have
previously been shared across processes, potentially turning it into a
normal page table used in another process in which unrelated VMAs can
afterwards be installed.
If this happens in the middle of a concurrent gup_fast(), gup_fast() could
end up walking the page tables of another process. While I don't see any
way in which that immediately leads to kernel memory corruption, it is
really weird and unexpected.
Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(),
just like we do in khugepaged when removing page tables for a THP
collapse.
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1…
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58…
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7ba020d489d4..8746ed2fec13 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7629,6 +7629,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
pud_clear(pud);
+ /*
+ * Once our caller drops the rmap lock, some other process might be
+ * using this page table as a normal, non-hugetlb page table.
+ * Wait for pending gup_fast() in other threads to finish before letting
+ * that happen.
+ */
+ tlb_remove_table_sync_one();
ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
mm_dec_nr_pmds(mm);
return 1;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1013af4f585fccc4d3e5c5824d174de2257f7d6d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062055-pettiness-snowshoe-2310@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1013af4f585fccc4d3e5c5824d174de2257f7d6d Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 27 May 2025 23:23:54 +0200
Subject: [PATCH] mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
huge_pmd_unshare() drops a reference on a page table that may have
previously been shared across processes, potentially turning it into a
normal page table used in another process in which unrelated VMAs can
afterwards be installed.
If this happens in the middle of a concurrent gup_fast(), gup_fast() could
end up walking the page tables of another process. While I don't see any
way in which that immediately leads to kernel memory corruption, it is
really weird and unexpected.
Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(),
just like we do in khugepaged when removing page tables for a THP
collapse.
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1…
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58…
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7ba020d489d4..8746ed2fec13 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7629,6 +7629,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
pud_clear(pud);
+ /*
+ * Once our caller drops the rmap lock, some other process might be
+ * using this page table as a normal, non-hugetlb page table.
+ * Wait for pending gup_fast() in other threads to finish before letting
+ * that happen.
+ */
+ tlb_remove_table_sync_one();
ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
mm_dec_nr_pmds(mm);
return 1;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1013af4f585fccc4d3e5c5824d174de2257f7d6d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062055-curator-nest-bade@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1013af4f585fccc4d3e5c5824d174de2257f7d6d Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 27 May 2025 23:23:54 +0200
Subject: [PATCH] mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
huge_pmd_unshare() drops a reference on a page table that may have
previously been shared across processes, potentially turning it into a
normal page table used in another process in which unrelated VMAs can
afterwards be installed.
If this happens in the middle of a concurrent gup_fast(), gup_fast() could
end up walking the page tables of another process. While I don't see any
way in which that immediately leads to kernel memory corruption, it is
really weird and unexpected.
Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(),
just like we do in khugepaged when removing page tables for a THP
collapse.
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1…
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58…
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7ba020d489d4..8746ed2fec13 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7629,6 +7629,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
pud_clear(pud);
+ /*
+ * Once our caller drops the rmap lock, some other process might be
+ * using this page table as a normal, non-hugetlb page table.
+ * Wait for pending gup_fast() in other threads to finish before letting
+ * that happen.
+ */
+ tlb_remove_table_sync_one();
ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
mm_dec_nr_pmds(mm);
return 1;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1013af4f585fccc4d3e5c5824d174de2257f7d6d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062054-snowdrop-stitch-3abf@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1013af4f585fccc4d3e5c5824d174de2257f7d6d Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 27 May 2025 23:23:54 +0200
Subject: [PATCH] mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
huge_pmd_unshare() drops a reference on a page table that may have
previously been shared across processes, potentially turning it into a
normal page table used in another process in which unrelated VMAs can
afterwards be installed.
If this happens in the middle of a concurrent gup_fast(), gup_fast() could
end up walking the page tables of another process. While I don't see any
way in which that immediately leads to kernel memory corruption, it is
really weird and unexpected.
Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(),
just like we do in khugepaged when removing page tables for a THP
collapse.
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1…
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58…
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7ba020d489d4..8746ed2fec13 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7629,6 +7629,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
pud_clear(pud);
+ /*
+ * Once our caller drops the rmap lock, some other process might be
+ * using this page table as a normal, non-hugetlb page table.
+ * Wait for pending gup_fast() in other threads to finish before letting
+ * that happen.
+ */
+ tlb_remove_table_sync_one();
ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
mm_dec_nr_pmds(mm);
return 1;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025062044-sporty-zesty-9142@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 27 May 2025 23:23:53 +0200
Subject: [PATCH] mm/hugetlb: unshare page tables during VMA split, not before
Currently, __split_vma() triggers hugetlb page table unsharing through
vm_ops->may_split(). This happens before the VMA lock and rmap locks are
taken - which is too early, it allows racing VMA-locked page faults in our
process and racing rmap walks from other processes to cause page tables to
be shared again before we actually perform the split.
Fix it by explicitly calling into the hugetlb unshare logic from
__split_vma() in the same place where THP splitting also happens. At that
point, both the VMA and the rmap(s) are write-locked.
An annoying detail is that we can now call into the helper
hugetlb_unshare_pmds() from two different locking contexts:
1. from hugetlb_split(), holding:
- mmap lock (exclusively)
- VMA lock
- file rmap lock (exclusively)
2. hugetlb_unshare_all_pmds(), which I think is designed to be able to
call us with only the mmap lock held (in shared mode), but currently
only runs while holding mmap lock (exclusively) and VMA lock
Backporting note:
This commit fixes a racy protection that was introduced in commit
b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that
commit claimed to fix an issue introduced in 5.13, but it should actually
also go all the way back.
[jannh(a)google.com: v2]
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1…
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1…
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58…
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs]
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0598f36931de..42f374e828a2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
}
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f0b1d53079f9..7ba020d489d4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -121,7 +121,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, bool take_locks);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static void hugetlb_free_folio(struct folio *folio)
@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
+ return 0;
+}
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
+{
/*
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+ * This function is called in the middle of a VMA split operation, with
+ * MM, VMA and rmap all write-locked to prevent concurrent page table
+ * walks (except hardware and gup_fast()).
*/
+ vma_assert_write_locked(vma);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
if (addr & ~PUD_MASK) {
- /*
- * hugetlb_vm_op_split is called right before we attempt to
- * split the VMA. We will need to unshare PMDs in the old and
- * new VMAs, so let's unshare before we split.
- */
unsigned long floor = addr & PUD_MASK;
unsigned long ceil = floor + PUD_SIZE;
- if (floor >= vma->vm_start && ceil <= vma->vm_end)
- hugetlb_unshare_pmds(vma, floor, ceil);
+ if (floor >= vma->vm_start && ceil <= vma->vm_end) {
+ /*
+ * Locking:
+ * Use take_locks=false here.
+ * The file rmap lock is already held.
+ * The hugetlb VMA lock can't be taken when we already
+ * hold the file rmap lock, and we don't need it because
+ * its purpose is to synchronize against concurrent page
+ * table walks, which are not possible thanks to the
+ * locks held by our caller.
+ */
+ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
+ }
}
-
- return 0;
}
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
@@ -7885,9 +7899,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
spin_unlock_irq(&hugetlb_lock);
}
+/*
+ * If @take_locks is false, the caller must ensure that no concurrent page table
+ * access can happen (except for gup_fast() and hardware page walks).
+ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
+ * concurrent page fault handling) and the file rmap lock.
+ */
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool take_locks)
{
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
@@ -7911,8 +7932,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
start, end);
mmu_notifier_invalidate_range_start(&range);
- hugetlb_vma_lock_write(vma);
- i_mmap_lock_write(vma->vm_file->f_mapping);
+ if (take_locks) {
+ hugetlb_vma_lock_write(vma);
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+ }
for (address = start; address < end; address += PUD_SIZE) {
ptep = hugetlb_walk(vma, address, sz);
if (!ptep)
@@ -7922,8 +7947,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
spin_unlock(ptl);
}
flush_hugetlb_tlb_range(vma, start, end);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
- hugetlb_vma_unlock_write(vma);
+ if (take_locks) {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
/*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst.
@@ -7938,7 +7965,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
- ALIGN_DOWN(vma->vm_end, PUD_SIZE));
+ ALIGN_DOWN(vma->vm_end, PUD_SIZE),
+ /* take_locks = */ true);
}
/*
diff --git a/mm/vma.c b/mm/vma.c
index 1c6595f282e5..7ebc9eb608f4 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -539,7 +539,14 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
init_vma_prep(&vp, vma);
vp.insert = new;
vma_prepare(&vp);
+
+ /*
+ * Get rid of huge pages and shared page tables straddling the split
+ * boundary.
+ */
vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_split(vma, addr);
if (new_below) {
vma->vm_start = addr;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 441feb21aa5a..4505b1c31be1 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -932,6 +932,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
(void)next;
}
+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
+
static inline void vma_iter_free(struct vma_iterator *vmi)
{
mas_destroy(&vmi->mas);