When dwc3_gadget_soft_disconnect() fails, dwc3_suspend_common() keeps
going with the suspend, resulting in a period where the power domain is
off, but the gadget driver remains connected. Within this time frame,
invoking vbus_event_work() will cause an error as it attempts to access
DWC3 registers for endpoint disabling after the power domain has been
completely shut down.
Abort the suspend sequence when dwc3_gadget_suspend() cannot halt the
controller and proceeds with a soft connect.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
CC: stable(a)vger.kernel.org
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
---
Kernel panic - not syncing: Asynchronous SError Interrupt
Workqueue: events vbus_event_work
Call trace:
dump_backtrace+0xf4/0x118
show_stack+0x18/0x24
dump_stack_lvl+0x60/0x7c
dump_stack+0x18/0x3c
panic+0x16c/0x390
nmi_panic+0xa4/0xa8
arm64_serror_panic+0x6c/0x94
do_serror+0xc4/0xd0
el1h_64_error_handler+0x34/0x48
el1h_64_error+0x68/0x6c
readl+0x4c/0x8c
__dwc3_gadget_ep_disable+0x48/0x230
dwc3_gadget_ep_disable+0x50/0xc0
usb_ep_disable+0x44/0xe4
ffs_func_eps_disable+0x64/0xc8
ffs_func_set_alt+0x74/0x368
ffs_func_disable+0x18/0x28
composite_disconnect+0x90/0xec
configfs_composite_disconnect+0x64/0x88
usb_gadget_disconnect_locked+0xc0/0x168
vbus_event_work+0x3c/0x58
process_one_work+0x1e4/0x43c
worker_thread+0x25c/0x430
kthread+0x104/0x1d4
ret_from_fork+0x10/0x20
---
Changelog:
v4:
- correct the mistake where semicolon was forgotten
- return -EAGAIN upon dwc3_gadget_suspend() failure
v3:
- change the Fixes tag
v2:
- move declarations in separate lines
- add the Fixes tag
---
drivers/usb/dwc3/core.c | 9 +++++++--
drivers/usb/dwc3/gadget.c | 22 +++++++++-------------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 66a08b527165..f36bc933c55b 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -2388,6 +2388,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
u32 reg;
int i;
+ int ret;
if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) {
dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) &
@@ -2406,7 +2407,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
case DWC3_GCTL_PRTCAP_DEVICE:
if (pm_runtime_suspended(dwc->dev))
break;
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
dwc3_core_exit(dwc);
break;
@@ -2441,7 +2444,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
break;
if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 89a4dc8ebf94..630fd5f0ce97 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4776,26 +4776,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
int ret;
ret = dwc3_gadget_soft_disconnect(dwc);
- if (ret)
- goto err;
-
- spin_lock_irqsave(&dwc->lock, flags);
- if (dwc->gadget_driver)
- dwc3_disconnect_gadget(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
-
- return 0;
-
-err:
/*
* Attempt to reset the controller's state. Likely no
* communication can be established until the host
* performs a port reset.
*/
- if (dwc->softconnect)
+ if (ret && dwc->softconnect) {
dwc3_gadget_soft_connect(dwc);
+ return -EAGAIN;
+ }
- return ret;
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
--
2.49.0.604.gff1f9ca942-goog
When dwc3_gadget_soft_disconnect() fails, dwc3_suspend_common() keeps
going with the suspend, resulting in a period where the power domain is
off, but the gadget driver remains connected. Within this time frame,
invoking vbus_event_work() will cause an error as it attempts to access
DWC3 registers for endpoint disabling after the power domain has been
completely shut down.
Abort the suspend sequence when dwc3_gadget_suspend() cannot halt the
controller and proceeds with a soft connect.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
CC: stable(a)vger.kernel.org
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
---
Kernel panic - not syncing: Asynchronous SError Interrupt
Workqueue: events vbus_event_work
Call trace:
dump_backtrace+0xf4/0x118
show_stack+0x18/0x24
dump_stack_lvl+0x60/0x7c
dump_stack+0x18/0x3c
panic+0x16c/0x390
nmi_panic+0xa4/0xa8
arm64_serror_panic+0x6c/0x94
do_serror+0xc4/0xd0
el1h_64_error_handler+0x34/0x48
el1h_64_error+0x68/0x6c
readl+0x4c/0x8c
__dwc3_gadget_ep_disable+0x48/0x230
dwc3_gadget_ep_disable+0x50/0xc0
usb_ep_disable+0x44/0xe4
ffs_func_eps_disable+0x64/0xc8
ffs_func_set_alt+0x74/0x368
ffs_func_disable+0x18/0x28
composite_disconnect+0x90/0xec
configfs_composite_disconnect+0x64/0x88
usb_gadget_disconnect_locked+0xc0/0x168
vbus_event_work+0x3c/0x58
process_one_work+0x1e4/0x43c
worker_thread+0x25c/0x430
kthread+0x104/0x1d4
ret_from_fork+0x10/0x20
---
Changelog:
v5:
- add the Acked-by tag
v4:
- correct the mistake where semicolon was forgotten
- return -EAGAIN upon dwc3_gadget_suspend() failure
v3:
- change the Fixes tag
v2:
- move declarations in separate lines
- add the Fixes tag
---
drivers/usb/dwc3/core.c | 9 +++++++--
drivers/usb/dwc3/gadget.c | 22 +++++++++-------------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 66a08b527165..f36bc933c55b 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -2388,6 +2388,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
u32 reg;
int i;
+ int ret;
if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) {
dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) &
@@ -2406,7 +2407,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
case DWC3_GCTL_PRTCAP_DEVICE:
if (pm_runtime_suspended(dwc->dev))
break;
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
dwc3_core_exit(dwc);
break;
@@ -2441,7 +2444,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
break;
if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 321361288935..b6b63b530148 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4821,26 +4821,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
int ret;
ret = dwc3_gadget_soft_disconnect(dwc);
- if (ret)
- goto err;
-
- spin_lock_irqsave(&dwc->lock, flags);
- if (dwc->gadget_driver)
- dwc3_disconnect_gadget(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
-
- return 0;
-
-err:
/*
* Attempt to reset the controller's state. Likely no
* communication can be established until the host
* performs a port reset.
*/
- if (dwc->softconnect)
+ if (ret && dwc->softconnect) {
dwc3_gadget_soft_connect(dwc);
+ return -EAGAIN;
+ }
- return ret;
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
--
2.49.0.1164.gab81da1b16-goog
It's possible for the folio to either get marked for writeback or
redirtied. Add a helper, filemap_end_dropbehind(), which guards the
folio_unmap_invalidate() call behind check for the folio being both
non-dirty and not under writeback AFTER the folio lock has been
acquired. Use this helper folio_end_dropbehind_write().
Cc: stable(a)vger.kernel.org
Reported-by: Al Viro <viro(a)zeniv.linux.org.uk>
Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes")
Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
mm/filemap.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 7b90cbeb4a1a..008a55290f34 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct folio *folio)
}
EXPORT_SYMBOL(folio_wait_private_2_killable);
+static void filemap_end_dropbehind(struct folio *folio)
+{
+ struct address_space *mapping = folio->mapping;
+
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+
+ if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio))
+ folio_unmap_invalidate(mapping, folio, 0);
+}
+
/*
* If folio was marked as dropbehind, then pages should be dropped when writeback
* completes. Do that now. If we fail, it's likely because of a big folio -
@@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(struct folio *folio)
* invalidation in that case.
*/
if (in_task() && folio_trylock(folio)) {
- if (folio->mapping)
- folio_unmap_invalidate(folio->mapping, folio, 0);
+ filemap_end_dropbehind(folio);
folio_unlock(folio);
}
}
--
2.49.0
In all the MUX value for LED1 GPIO46 there is a Copy-Paste error where
the MUX value is set to LED0_MODE_MASK instead of LED1_MODE_MASK.
This wasn't notice as there were no board that made use of the
secondary PHY LED but looking at the internal Documentation the actual
value should be LED1_MODE_MASK similar to the other GPIO entry.
Fix the wrong value to apply the correct MUX configuration.
Cc: stable(a)vger.kernel.org
Fixes: 1c8ace2d0725 ("pinctrl: airoha: Add support for EN7581 SoC")
Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com>
---
drivers/pinctrl/mediatek/pinctrl-airoha.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c
index b97b28ebb37a..8ef7f88477aa 100644
--- a/drivers/pinctrl/mediatek/pinctrl-airoha.c
+++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c
@@ -1752,8 +1752,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = {
.regmap[0] = {
AIROHA_FUNC_MUX,
REG_GPIO_2ND_I2C_MODE,
- GPIO_LAN3_LED0_MODE_MASK,
- GPIO_LAN3_LED0_MODE_MASK
+ GPIO_LAN3_LED1_MODE_MASK,
+ GPIO_LAN3_LED1_MODE_MASK
},
.regmap[1] = {
AIROHA_FUNC_MUX,
@@ -1816,8 +1816,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = {
.regmap[0] = {
AIROHA_FUNC_MUX,
REG_GPIO_2ND_I2C_MODE,
- GPIO_LAN3_LED0_MODE_MASK,
- GPIO_LAN3_LED0_MODE_MASK
+ GPIO_LAN3_LED1_MODE_MASK,
+ GPIO_LAN3_LED1_MODE_MASK
},
.regmap[1] = {
AIROHA_FUNC_MUX,
@@ -1880,8 +1880,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = {
.regmap[0] = {
AIROHA_FUNC_MUX,
REG_GPIO_2ND_I2C_MODE,
- GPIO_LAN3_LED0_MODE_MASK,
- GPIO_LAN3_LED0_MODE_MASK
+ GPIO_LAN3_LED1_MODE_MASK,
+ GPIO_LAN3_LED1_MODE_MASK
},
.regmap[1] = {
AIROHA_FUNC_MUX,
@@ -1944,8 +1944,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = {
.regmap[0] = {
AIROHA_FUNC_MUX,
REG_GPIO_2ND_I2C_MODE,
- GPIO_LAN3_LED0_MODE_MASK,
- GPIO_LAN3_LED0_MODE_MASK
+ GPIO_LAN3_LED1_MODE_MASK,
+ GPIO_LAN3_LED1_MODE_MASK
},
.regmap[1] = {
AIROHA_FUNC_MUX,
--
2.48.1
Commit e77aff5528a18 ("binderfs: fix use-after-free in binder_devices")
addressed a use-after-free where devices could be released without first
being removed from the binder_devices list. However, there is a similar
path in binder_free_proc() that was missed:
==================================================================
BUG: KASAN: slab-use-after-free in binder_remove_device+0xd4/0x100
Write of size 8 at addr ffff0000c773b900 by task umount/467
CPU: 12 UID: 0 PID: 467 Comm: umount Not tainted 6.15.0-rc7-00138-g57483a362741 #9 PREEMPT
Hardware name: linux,dummy-virt (DT)
Call trace:
binder_remove_device+0xd4/0x100
binderfs_evict_inode+0x230/0x2f0
evict+0x25c/0x5dc
iput+0x304/0x480
dentry_unlink_inode+0x208/0x46c
__dentry_kill+0x154/0x530
[...]
Allocated by task 463:
__kmalloc_cache_noprof+0x13c/0x324
binderfs_binder_device_create.isra.0+0x138/0xa60
binder_ctl_ioctl+0x1ac/0x230
[...]
Freed by task 215:
kfree+0x184/0x31c
binder_proc_dec_tmpref+0x33c/0x4ac
binder_deferred_func+0xc10/0x1108
process_one_work+0x520/0xba4
[...]
==================================================================
Call binder_remove_device() within binder_free_proc() to ensure the
device is removed from the binder_devices list before being kfreed.
Cc: stable(a)vger.kernel.org
Fixes: 12d909cac1e1 ("binderfs: add new binder devices to binder_devices")
Reported-by: syzbot+4af454407ec393de51d6(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=4af454407ec393de51d6
Tested-by: syzbot+4af454407ec393de51d6(a)syzkaller.appspotmail.com
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
---
drivers/android/binder.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 682bbe4ad550..c463ca4a8fff 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -5241,6 +5241,7 @@ static void binder_free_proc(struct binder_proc *proc)
__func__, proc->outstanding_txns);
device = container_of(proc->context, struct binder_device, context);
if (refcount_dec_and_test(&device->ref)) {
+ binder_remove_device(device);
kfree(proc->context->name);
kfree(device);
}
--
2.49.0.1151.ga128411c76-goog
The patch titled
Subject: fs/dax: fix "don't skip locked entries when scanning entries"
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fs-dax-fix-dont-skip-locked-entries-when-scanning-entries.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Alistair Popple <apopple(a)nvidia.com>
Subject: fs/dax: fix "don't skip locked entries when scanning entries"
Date: Fri, 23 May 2025 14:37:49 +1000
Commit 6be3e21d25ca ("fs/dax: don't skip locked entries when scanning
entries") introduced a new function, wait_entry_unlocked_exclusive(),
which waits for the current entry to become unlocked without advancing the
XArray iterator state.
Waiting for the entry to become unlocked requires dropping the XArray
lock. This requires calling xas_pause() prior to dropping the lock which
leaves the xas in a suitable state for the next iteration. However this
has the side-effect of advancing the xas state to the next index.
Normally this isn't an issue because xas_for_each() contains code to
detect this state and thus avoid advancing the index a second time on the
next loop iteration.
However both callers of and wait_entry_unlocked_exclusive() itself
subsequently use the xas state to reload the entry. As xas_pause()
updated the state to the next index this will cause the current entry
which is being waited on to be skipped. This caused the following warning
to fire intermittently when running xftest generic/068 on an XFS
filesystem with FS DAX enabled:
[ 35.067397] ------------[ cut here ]------------
[ 35.068229] WARNING: CPU: 21 PID: 1640 at mm/truncate.c:89 truncate_folio_batch_exceptionals+0xd8/0x1e0
[ 35.069717] Modules linked in: nd_pmem dax_pmem nd_btt nd_e820 libnvdimm
[ 35.071006] CPU: 21 UID: 0 PID: 1640 Comm: fstest Not tainted 6.15.0-rc7+ #77 PREEMPT(voluntary)
[ 35.072613] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/204
[ 35.074845] RIP: 0010:truncate_folio_batch_exceptionals+0xd8/0x1e0
[ 35.075962] Code: a1 00 00 00 f6 47 0d 20 0f 84 97 00 00 00 4c 63 e8 41 39 c4 7f 0b eb 61 49 83 c5 01 45 39 ec 7e 58 42 f68
[ 35.079522] RSP: 0018:ffffb04e426c7850 EFLAGS: 00010202
[ 35.080359] RAX: 0000000000000000 RBX: ffff9d21e3481908 RCX: ffffb04e426c77f4
[ 35.081477] RDX: ffffb04e426c79e8 RSI: ffffb04e426c79e0 RDI: ffff9d21e34816e8
[ 35.082590] RBP: ffffb04e426c79e0 R08: 0000000000000001 R09: 0000000000000003
[ 35.083733] R10: 0000000000000000 R11: 822b53c0f7a49868 R12: 000000000000001f
[ 35.084850] R13: 0000000000000000 R14: ffffb04e426c78e8 R15: fffffffffffffffe
[ 35.085953] FS: 00007f9134c87740(0000) GS:ffff9d22abba0000(0000) knlGS:0000000000000000
[ 35.087346] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 35.088244] CR2: 00007f9134c86000 CR3: 000000040afff000 CR4: 00000000000006f0
[ 35.089354] Call Trace:
[ 35.089749] <TASK>
[ 35.090168] truncate_inode_pages_range+0xfc/0x4d0
[ 35.091078] truncate_pagecache+0x47/0x60
[ 35.091735] xfs_setattr_size+0xc7/0x3e0
[ 35.092648] xfs_vn_setattr+0x1ea/0x270
[ 35.093437] notify_change+0x1f4/0x510
[ 35.094219] ? do_truncate+0x97/0xe0
[ 35.094879] do_truncate+0x97/0xe0
[ 35.095640] path_openat+0xabd/0xca0
[ 35.096278] do_filp_open+0xd7/0x190
[ 35.096860] do_sys_openat2+0x8a/0xe0
[ 35.097459] __x64_sys_openat+0x6d/0xa0
[ 35.098076] do_syscall_64+0xbb/0x1d0
[ 35.098647] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 35.099444] RIP: 0033:0x7f9134d81fc1
[ 35.100033] Code: 75 57 89 f0 25 00 00 41 00 3d 00 00 41 00 74 49 80 3d 2a 26 0e 00 00 74 6d 89 da 48 89 ee bf 9c ff ff ff5
[ 35.102993] RSP: 002b:00007ffcd41e0d10 EFLAGS: 00000202 ORIG_RAX: 0000000000000101
[ 35.104263] RAX: ffffffffffffffda RBX: 0000000000000242 RCX: 00007f9134d81fc1
[ 35.105452] RDX: 0000000000000242 RSI: 00007ffcd41e1200 RDI: 00000000ffffff9c
[ 35.106663] RBP: 00007ffcd41e1200 R08: 0000000000000000 R09: 0000000000000064
[ 35.107923] R10: 00000000000001a4 R11: 0000000000000202 R12: 0000000000000066
[ 35.109112] R13: 0000000000100000 R14: 0000000000100000 R15: 0000000000000400
[ 35.110357] </TASK>
[ 35.110769] irq event stamp: 8415587
[ 35.111486] hardirqs last enabled at (8415599): [<ffffffff8d74b562>] __up_console_sem+0x52/0x60
[ 35.113067] hardirqs last disabled at (8415610): [<ffffffff8d74b547>] __up_console_sem+0x37/0x60
[ 35.114575] softirqs last enabled at (8415300): [<ffffffff8d6ac625>] handle_softirqs+0x315/0x3f0
[ 35.115933] softirqs last disabled at (8415291): [<ffffffff8d6ac811>] __irq_exit_rcu+0xa1/0xc0
[ 35.117316] ---[ end trace 0000000000000000 ]---
Fix this by using xas_reset() instead, which is equivalent in
implementation to xas_pause() but does not advance the XArray state.
Link: https://lkml.kernel.org/r/20250523043749.1460780-1-apopple@nvidia.com
Fixes: 6be3e21d25ca ("fs/dax: don't skip locked entries when scanning entries")
Signed-off-by: Alistair Popple <apopple(a)nvidia.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: Alison Schofield <alison.schofield(a)intel.com>
Cc: Matthew Wilcow (Oracle) <willy(a)infradead.org>
Cc: Balbir Singh <balbirs(a)nvidia.com>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Ted Ts'o <tytso(a)mit.edu>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/dax.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/dax.c~fs-dax-fix-dont-skip-locked-entries-when-scanning-entries
+++ a/fs/dax.c
@@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusi
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
- xas_pause(xas);
+ xas_reset(xas);
xas_unlock_irq(xas);
schedule();
finish_wait(wq, &ewait.wait);
_
Patches currently in -mm which might be from apopple(a)nvidia.com are
fs-dax-fix-dont-skip-locked-entries-when-scanning-entries.patch
The patch titled
Subject: mm/khugepaged: fix race with folio split/free using temporary reference
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-khugepaged-fix-race-with-folio-split-free-using-temporary-reference.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Shivank Garg <shivankg(a)amd.com>
Subject: mm/khugepaged: fix race with folio split/free using temporary reference
Date: Mon, 26 May 2025 18:28:18 +0000
hpage_collapse_scan_file() calls is_refcount_suitable(), which in turn
calls folio_mapcount(). folio_mapcount() checks folio_test_large() before
proceeding to folio_large_mapcount(), but there is a race window where the
folio may get split/freed between these checks, triggering:
VM_WARN_ON_FOLIO(!folio_test_large(folio), folio)
Take a temporary reference to the folio in hpage_collapse_scan_file().
This stabilizes the folio during refcount check and prevents incorrect
large folio detection due to concurrent split/free. Use helper
folio_expected_ref_count() + 1 to compare with folio_ref_count() instead
of using is_refcount_suitable().
Link: https://lkml.kernel.org/r/20250526182818.37978-1-shivankg@amd.com
Fixes: 05c5323b2a34 ("mm: track mapcount of large folios in single value")
Signed-off-by: Shivank Garg <shivankg(a)amd.com>
Reported-by: syzbot+2b99589e33edbe9475ca(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6828470d.a70a0220.38f255.000c.GAE@google.com
Suggested-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Dev Jain <dev.jain(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Bharata B Rao <bharata(a)amd.com>
Cc: Fengwei Yin <fengwei.yin(a)intel.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mariano Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
--- a/mm/khugepaged.c~mm-khugepaged-fix-race-with-folio-split-free-using-temporary-reference
+++ a/mm/khugepaged.c
@@ -2295,6 +2295,17 @@ static int hpage_collapse_scan_file(stru
continue;
}
+ if (!folio_try_get(folio)) {
+ xas_reset(&xas);
+ continue;
+ }
+
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
+ xas_reset(&xas);
+ continue;
+ }
+
if (folio_order(folio) == HPAGE_PMD_ORDER &&
folio->index == start) {
/* Maybe PMD-mapped */
@@ -2305,23 +2316,27 @@ static int hpage_collapse_scan_file(stru
* it's safe to skip LRU and refcount checks before
* returning.
*/
+ folio_put(folio);
break;
}
node = folio_nid(folio);
if (hpage_collapse_scan_abort(node, cc)) {
result = SCAN_SCAN_ABORT;
+ folio_put(folio);
break;
}
cc->node_load[node]++;
if (!folio_test_lru(folio)) {
result = SCAN_PAGE_LRU;
+ folio_put(folio);
break;
}
- if (!is_refcount_suitable(folio)) {
+ if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) {
result = SCAN_PAGE_COUNT;
+ folio_put(folio);
break;
}
@@ -2333,6 +2348,7 @@ static int hpage_collapse_scan_file(stru
*/
present += folio_nr_pages(folio);
+ folio_put(folio);
if (need_resched()) {
xas_pause(&xas);
_
Patches currently in -mm which might be from shivankg(a)amd.com are
mm-khugepaged-fix-race-with-folio-split-free-using-temporary-reference.patch
mm-khugepaged-clean-up-refcount-check-using-folio_expected_ref_count.patch
blk_mq_freeze_queue() never terminates if one or more bios are on the plug
list and if the block device driver defines a .submit_bio() method.
This is the case for device mapper drivers. The deadlock happens because
blk_mq_freeze_queue() waits for q_usage_counter to drop to zero, because
a queue reference is held by bios on the plug list and because the
__bio_queue_enter() call in __submit_bio() waits for the queue to be
unfrozen.
This patch fixes the following deadlock:
Workqueue: dm-51_zwplugs blk_zone_wplug_bio_work
Call trace:
__schedule+0xb08/0x1160
schedule+0x48/0xc8
__bio_queue_enter+0xcc/0x1d0
__submit_bio+0x100/0x1b0
submit_bio_noacct_nocheck+0x230/0x49c
blk_zone_wplug_bio_work+0x168/0x250
process_one_work+0x26c/0x65c
worker_thread+0x33c/0x498
kthread+0x110/0x134
ret_from_fork+0x10/0x20
Call trace:
__switch_to+0x230/0x410
__schedule+0xb08/0x1160
schedule+0x48/0xc8
blk_mq_freeze_queue_wait+0x78/0xb8
blk_mq_freeze_queue+0x90/0xa4
queue_attr_store+0x7c/0xf0
sysfs_kf_write+0x98/0xc8
kernfs_fop_write_iter+0x12c/0x1d4
vfs_write+0x340/0x3ac
ksys_write+0x78/0xe8
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Damien Le Moal <dlemoal(a)kernel.org>
Cc: Yu Kuai <yukuai1(a)huaweicloud.com>
Cc: Ming Lei <ming.lei(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: dd291d77cc90 ("block: Introduce zone write plugging")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
Changes compared to v1: fixed a race condition. Call bio_zone_write_plugging()
only before submitting the bio and not after it has been submitted.
block/blk-core.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index b862c66018f2..713fb3865260 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -621,6 +621,13 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_OK;
}
+/*
+ * Do not call bio_queue_enter() if the BIO_ZONE_WRITE_PLUGGING flag has been
+ * set because this causes blk_mq_freeze_queue() to deadlock if
+ * blk_zone_wplug_bio_work() submits a bio. Calling bio_queue_enter() for bios
+ * on the plug list is not necessary since a q_usage_counter reference is held
+ * while a bio is on the plug list.
+ */
static void __submit_bio(struct bio *bio)
{
/* If plug is not used, add new plug here to cache nsecs time. */
@@ -633,8 +640,12 @@ static void __submit_bio(struct bio *bio)
if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) {
blk_mq_submit_bio(bio);
- } else if (likely(bio_queue_enter(bio) == 0)) {
+ } else {
struct gendisk *disk = bio->bi_bdev->bd_disk;
+ bool zwp = bio_zone_write_plugging(bio);
+
+ if (unlikely(!zwp && bio_queue_enter(bio) != 0))
+ goto finish_plug;
if ((bio->bi_opf & REQ_POLLED) &&
!(disk->queue->limits.features & BLK_FEAT_POLL)) {
@@ -643,9 +654,12 @@ static void __submit_bio(struct bio *bio)
} else {
disk->fops->submit_bio(bio);
}
- blk_queue_exit(disk->queue);
+
+ if (!zwp)
+ blk_queue_exit(disk->queue);
}
+finish_plug:
blk_finish_plug(&plug);
}
Customer is reporting a really subtle issue where we get random DMAR
faults, hangs and other nasties for kernel migration jobs when stressing
stuff like s2idle/s3/s4. The explosions seems to happen somewhere
after resuming the system with splats looking something like:
PM: suspend exit
rfkill: input handler disabled
xe 0000:00:02.0: [drm] GT0: Engine reset: engine_class=bcs, logical_mask: 0x2, guc_id=0
xe 0000:00:02.0: [drm] GT0: Timedout job: seqno=24496, lrc_seqno=24496, guc_id=0, flags=0x13 in no process [-1]
xe 0000:00:02.0: [drm] GT0: Kernel-submitted job timed out
The likely cause appears to be a race between suspend cancelling the
worker that processes the free_job()'s, such that we still have pending
jobs to be freed after the cancel. Following from this, on resume the
pending_list will now contain at least one already complete job, but it
looks like we call drm_sched_resubmit_jobs(), which will then call
run_job() on everything still on the pending_list. But if the job was
already complete, then all the resources tied to the job, like the bb
itself, any memory that is being accessed, the iommu mappings etc. might
be long gone since those are usually tied to the fence signalling.
This scenario can be seen in ftrace when running a slightly modified
xe_pm (kernel was only modified to inject artificial latency into
free_job to make the race easier to hit):
xe_sched_job_run: dev=0000:00:02.0, fence=0xffff888276cc8540, seqno=0, lrc_seqno=0, gt=0, guc_id=0, batch_addr=0x000000146910 ...
xe_exec_queue_stop: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x0, flags=0x13
xe_exec_queue_stop: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=1, guc_state=0x0, flags=0x4
xe_exec_queue_stop: dev=0000:00:02.0, 4:0x1, gt=1, width=1, guc_id=0, guc_state=0x0, flags=0x3
xe_exec_queue_stop: dev=0000:00:02.0, 1:0x1, gt=1, width=1, guc_id=1, guc_state=0x0, flags=0x3
xe_exec_queue_stop: dev=0000:00:02.0, 4:0x1, gt=1, width=1, guc_id=2, guc_state=0x0, flags=0x3
xe_exec_queue_resubmit: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x0, flags=0x13
xe_sched_job_run: dev=0000:00:02.0, fence=0xffff888276cc8540, seqno=0, lrc_seqno=0, gt=0, guc_id=0, batch_addr=0x000000146910 ...
.....
xe_exec_queue_memory_cat_error: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x3, flags=0x13
So the job_run() is clearly triggered twice for the same job, even
though the first must have already signalled to completion during
suspend. We can also see a CAT error after the re-submit.
To prevent this try to call xe_sched_stop() to forcefully remove
anything on the pending_list that has already signalled, before we
re-submit.
v2:
- Make sure to re-arm the fence callbacks with sched_start().
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4856
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: William Tseng <william.tseng(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_gpu_scheduler.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index c250ea773491..0c8fe0461df9 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -51,7 +51,9 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
{
+ drm_sched_stop(&sched->base, NULL); /* remove completed jobs */
drm_sched_resubmit_jobs(&sched->base);
+ drm_sched_start(&sched->base, 0); /* re-add fence callback for pending jobs */
}
static inline bool
--
2.49.0
Hi Greg and Sasha,
Please find attatched backports for 6.14 and 6.12 (which have -Wextra
enabled by default) to turn off a new warning from -Wextra in GCC 15 and
Clang 21, -Wunterminated-string-initialization, which is fatal when
CONFIG_WERROR is enabled. Please let me know if there are any issues or
questions.
Cheers,
Nathan
From: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
BugLink: https://bugs.launchpad.net/bugs/2111592
commit 8a7d12d674ac ("net: usb: usbnet: fix name regression") assumed
that local addresses always came from the kernel, but some devices hand
out local mac addresses so we ended up with point-to-point devices with
a mac set by the driver, renaming to eth%d when they used to be named
usb%d.
Userspace should not rely on device name, but for the sake of stability
restore the local mac address check portion of the naming exception:
point to point devices which either have no mac set by the driver or
have a local mac handed out by the driver will keep the usb%d name.
(some USB LTE modems are known to hand out a stable mac from the locally
administered range; that mac appears to be random (different for
mulitple devices) and can be reset with device-specific commands, so
while such devices would benefit from getting a OUI reserved, we have
to deal with these and might as well preserve the existing behavior
to avoid breaking fragile openwrt configurations and such on upgrade.)
Link: https://lkml.kernel.org/r/20241203130457.904325-1-asmadeus@codewreck.org
Fixes: 8a7d12d674ac ("net: usb: usbnet: fix name regression")
Cc: stable(a)vger.kernel.org
Tested-by: Ahmed Naseef <naseefkm(a)gmail.com>
Signed-off-by: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
Acked-by: Oliver Neukum <oneukum(a)suse.com>
Link: https://patch.msgid.link/20250326-usbnet_rename-v2-1-57eb21fcff26@atmark-te…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
(cherry picked from commit 2ea396448f26d0d7d66224cb56500a6789c7ed07)
Signed-off-by: Jianlin Lv <iecedge(a)gmail.com>
---
drivers/net/usb/usbnet.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 9f66c47dc58b..08cbc8e4b361 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -178,6 +178,17 @@ int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress)
}
EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr);
+static bool usbnet_needs_usb_name_format(struct usbnet *dev, struct net_device *net)
+{
+ /* Point to point devices which don't have a real MAC address
+ * (or report a fake local one) have historically used the usb%d
+ * naming. Preserve this..
+ */
+ return (dev->driver_info->flags & FLAG_POINTTOPOINT) != 0 &&
+ (is_zero_ether_addr(net->dev_addr) ||
+ is_local_ether_addr(net->dev_addr));
+}
+
static void intr_complete (struct urb *urb)
{
struct usbnet *dev = urb->context;
@@ -1766,13 +1777,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
if (status < 0)
goto out1;
- // heuristic: "usb%d" for links we know are two-host,
- // else "eth%d" when there's reasonable doubt. userspace
- // can rename the link if it knows better.
+ /* heuristic: rename to "eth%d" if we are not sure this link
+ * is two-host (these links keep "usb%d")
+ */
if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
- ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
- /* somebody touched it*/
- !is_zero_ether_addr(net->dev_addr)))
+ !usbnet_needs_usb_name_format(dev, net))
strscpy(net->name, "eth%d", sizeof(net->name));
/* WLAN devices should always be named "wlan%d" */
if ((dev->driver_info->flags & FLAG_WLAN) != 0)
--
2.34.1
在 2025/5/23 07:31, Sasha Levin 写道:
> This is a note to let you know that I've just added the patch titled
>
> btrfs: properly limit inline data extent according to block size
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> btrfs-properly-limit-inline-data-extent-according-to.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this patch from all stable trees.
This is only for a debug feature, 2K block size, and it will never be
exposed to end users (only to allow people without a 64K page sized
system to test subpage routine on x86_64).
Thanks,
Qu
>
>
>
> commit a5afc96d757771c992eb3af4629a562ec52ba1dc
> Author: Qu Wenruo <wqu(a)suse.com>
> Date: Tue Feb 25 14:30:44 2025 +1030
>
> btrfs: properly limit inline data extent according to block size
>
> [ Upstream commit 23019d3e6617a8ec99a8d2f5947aa3dd8a74a1b8 ]
>
> Btrfs utilizes inline data extent for the following cases:
>
> - Regular small files
> - Symlinks
>
> And "btrfs check" detects any file extents that are too large as an
> error.
>
> It's not a problem for 4K block size, but for the incoming smaller
> block sizes (2K), it can cause problems due to bad limits:
>
> - Non-compressed inline data extents
> We do not allow a non-compressed inline data extent to be as large as
> block size.
>
> - Symlinks
> Currently the only real limit on symlinks are 4K, which can be larger
> than 2K block size.
>
> These will result btrfs-check to report too large file extents.
>
> Fix it by adding proper size checks for the above cases.
>
> Signed-off-by: Qu Wenruo <wqu(a)suse.com>
> Reviewed-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 9ce1270addb04..0da2611fb9c85 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -623,6 +623,10 @@ static bool can_cow_file_range_inline(struct btrfs_inode *inode,
> if (size > fs_info->sectorsize)
> return false;
>
> + /* We do not allow a non-compressed extent to be as large as block size. */
> + if (data_len >= fs_info->sectorsize)
> + return false;
> +
> /* We cannot exceed the maximum inline data size. */
> if (data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
> return false;
> @@ -8691,7 +8695,12 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
> struct extent_buffer *leaf;
>
> name_len = strlen(symname);
> - if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
> + /*
> + * Symlinks utilize uncompressed inline extent data, which should not
> + * reach block size.
> + */
> + if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
> + name_len >= fs_info->sectorsize)
> return -ENAMETOOLONG;
>
> inode = new_inode(dir->i_sb);
On 22.05.25 23:06, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> btrfs: zoned: exit btrfs_can_activate_zone if BTRFS_FS_NEED_ZONE_FINISH is set
>
> to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> btrfs-zoned-exit-btrfs_can_activate_zone-if-btrfs_fs.patch
> and it can be found in the queue-6.14 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
Hey Sasha,
this patch is just a readability cleanup, no reason to backport it.
Thanks,
Johannes
>
>
> commit 1136d333d91088ecf2d5189367540a84e60449a0
> Author: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
> Date: Wed Feb 12 15:05:00 2025 +0100
>
> btrfs: zoned: exit btrfs_can_activate_zone if BTRFS_FS_NEED_ZONE_FINISH is set
>
> [ Upstream commit 26b38e28162ef4ceb1e0482299820fbbd7dbcd92 ]
>
> If BTRFS_FS_NEED_ZONE_FINISH is already set for the whole filesystem, exit
> early in btrfs_can_activate_zone(). There's no need to check if
> BTRFS_FS_NEED_ZONE_FINISH needs to be set if it is already set.
>
> Reviewed-by: Naohiro Aota <naohiro.aota(a)wdc.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
> Reviewed-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index f39656668967c..4a3e02b49f295 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -2344,6 +2344,9 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
> if (!btrfs_is_zoned(fs_info))
> return true;
>
> + if (test_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags))
> + return false;
> +
> /* Check if there is a device with active zones left */
> mutex_lock(&fs_info->chunk_mutex);
> spin_lock(&fs_info->zone_active_bgs_lock);
>
From: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
commit 8a7d12d674ac ("net: usb: usbnet: fix name regression") assumed
that local addresses always came from the kernel, but some devices hand
out local mac addresses so we ended up with point-to-point devices with
a mac set by the driver, renaming to eth%d when they used to be named
usb%d.
Userspace should not rely on device name, but for the sake of stability
restore the local mac address check portion of the naming exception:
point to point devices which either have no mac set by the driver or
have a local mac handed out by the driver will keep the usb%d name.
(some USB LTE modems are known to hand out a stable mac from the locally
administered range; that mac appears to be random (different for
mulitple devices) and can be reset with device-specific commands, so
while such devices would benefit from getting a OUI reserved, we have
to deal with these and might as well preserve the existing behavior
to avoid breaking fragile openwrt configurations and such on upgrade.)
Link: https://lkml.kernel.org/r/20241203130457.904325-1-asmadeus@codewreck.org
Fixes: 8a7d12d674ac ("net: usb: usbnet: fix name regression")
Cc: stable(a)vger.kernel.org
Tested-by: Ahmed Naseef <naseefkm(a)gmail.com>
Signed-off-by: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
Acked-by: Oliver Neukum <oneukum(a)suse.com>
Link: https://patch.msgid.link/20250326-usbnet_rename-v2-1-57eb21fcff26@atmark-te…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
(cherry picked from commit 2ea396448f26d0d7d66224cb56500a6789c7ed07)
Signed-off-by: Jianlin Lv <iecedge(a)gmail.com>
---
drivers/net/usb/usbnet.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 9f66c47dc58b..08cbc8e4b361 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -178,6 +178,17 @@ int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress)
}
EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr);
+static bool usbnet_needs_usb_name_format(struct usbnet *dev, struct net_device *net)
+{
+ /* Point to point devices which don't have a real MAC address
+ * (or report a fake local one) have historically used the usb%d
+ * naming. Preserve this..
+ */
+ return (dev->driver_info->flags & FLAG_POINTTOPOINT) != 0 &&
+ (is_zero_ether_addr(net->dev_addr) ||
+ is_local_ether_addr(net->dev_addr));
+}
+
static void intr_complete (struct urb *urb)
{
struct usbnet *dev = urb->context;
@@ -1766,13 +1777,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
if (status < 0)
goto out1;
- // heuristic: "usb%d" for links we know are two-host,
- // else "eth%d" when there's reasonable doubt. userspace
- // can rename the link if it knows better.
+ /* heuristic: rename to "eth%d" if we are not sure this link
+ * is two-host (these links keep "usb%d")
+ */
if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
- ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
- /* somebody touched it*/
- !is_zero_ether_addr(net->dev_addr)))
+ !usbnet_needs_usb_name_format(dev, net))
strscpy(net->name, "eth%d", sizeof(net->name));
/* WLAN devices should always be named "wlan%d" */
if ((dev->driver_info->flags & FLAG_WLAN) != 0)
--
2.34.1
Hi
Patch "remoteproc: qcom_wcnss: Handle platforms with only single power
domain" was added to stable queue for 6.14, 6.12, 6.6, 6.1 and 5.15 but
the patch has an issue which was fixed in upstream commit
4ca45af0a56d00b86285d6fdd720dca3215059a7
(https://lore.kernel.org/linux-arm-msm/20250511234026.94735-1-matti.lehtimak…).
Either the patch"remoteproc: qcom_wcnss: Handle platforms with only
single power domain" should not be included in stable releases or the
fix should be included as well.
Adding "remoteproc: qcom_wcnss: Handle platforms with only single power
domain" to stable releases is probably not really necessary anyway.
Thanks,
Matti
On Fri, May 23, 2025 at 7:17 AM Sasha Levin <sashal(a)kernel.org> wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> bpf: Prevent unsafe access to the sock fields in the BPF timestamping callback
>
> to the 6.1-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> bpf-prevent-unsafe-access-to-the-sock-fields-in-the-.patch
> and it can be found in the queue-6.1 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Hi,
I'm notified that this patch has been added into many branches, which
is against my expectations. The BPF timestaping feature was
implemented in 6.14 and the patch you are handling is just one of them.
The function of this patch prevents unexpected bpf programs using this
feature from triggering
fatal problems. So, IMHO, we don't need this patch in all the
older/stable branches:)
Thanks,
Jason
>
>
>
> commit 00b709040e0fdf5949dfbf02f38521e0b10943ac
> Author: Jason Xing <kerneljasonxing(a)gmail.com>
> Date: Thu Feb 20 15:29:31 2025 +0800
>
> bpf: Prevent unsafe access to the sock fields in the BPF timestamping callback
>
> [ Upstream commit fd93eaffb3f977b23bc0a48d4c8616e654fcf133 ]
>
> The subsequent patch will implement BPF TX timestamping. It will
> call the sockops BPF program without holding the sock lock.
>
> This breaks the current assumption that all sock ops programs will
> hold the sock lock. The sock's fields of the uapi's bpf_sock_ops
> requires this assumption.
>
> To address this, a new "u8 is_locked_tcp_sock;" field is added. This
> patch sets it in the current sock_ops callbacks. The "is_fullsock"
> test is then replaced by the "is_locked_tcp_sock" test during
> sock_ops_convert_ctx_access().
>
> The new TX timestamping callbacks added in the subsequent patch will
> not have this set. This will prevent unsafe access from the new
> timestamping callbacks.
>
> Potentially, we could allow read-only access. However, this would
> require identifying which callback is read-safe-only and also requires
> additional BPF instruction rewrites in the covert_ctx. Since the BPF
> program can always read everything from a socket (e.g., by using
> bpf_core_cast), this patch keeps it simple and disables all read
> and write access to any socket fields through the bpf_sock_ops
> UAPI from the new TX timestamping callback.
>
> Moreover, note that some of the fields in bpf_sock_ops are specific
> to tcp_sock, and sock_ops currently only supports tcp_sock. In
> the future, UDP timestamping will be added, which will also break
> this assumption. The same idea used in this patch will be reused.
> Considering that the current sock_ops only supports tcp_sock, the
> variable is named is_locked_"tcp"_sock.
>
> Signed-off-by: Jason Xing <kerneljasonxing(a)gmail.com>
> Signed-off-by: Martin KaFai Lau <martin.lau(a)kernel.org>
> Link: https://patch.msgid.link/20250220072940.99994-4-kerneljasonxing@gmail.com
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index f3ef1a8965bb2..09cc8fb735f02 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -1319,6 +1319,7 @@ struct bpf_sock_ops_kern {
> void *skb_data_end;
> u8 op;
> u8 is_fullsock;
> + u8 is_locked_tcp_sock;
> u8 remaining_opt_len;
> u64 temp; /* temp and everything after is not
> * initialized to 0 before calling
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 83e0362e3b721..63caa3181dfe6 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2409,6 +2409,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
> memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> if (sk_fullsock(sk)) {
> sock_ops.is_fullsock = 1;
> + sock_ops.is_locked_tcp_sock = 1;
> sock_owned_by_me(sk);
> }
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 497b41ac399da..5c9f3fcb957bb 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -10240,10 +10240,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
> } \
> *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
> struct bpf_sock_ops_kern, \
> - is_fullsock), \
> + is_locked_tcp_sock), \
> fullsock_reg, si->src_reg, \
> offsetof(struct bpf_sock_ops_kern, \
> - is_fullsock)); \
> + is_locked_tcp_sock)); \
> *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
> if (si->dst_reg == si->src_reg) \
> *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
> @@ -10328,10 +10328,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
> temp)); \
> *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
> struct bpf_sock_ops_kern, \
> - is_fullsock), \
> + is_locked_tcp_sock), \
> reg, si->dst_reg, \
> offsetof(struct bpf_sock_ops_kern, \
> - is_fullsock)); \
> + is_locked_tcp_sock)); \
> *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
> *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
> struct bpf_sock_ops_kern, sk),\
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index db1a99df29d55..16f4a41a068e4 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -168,6 +168,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
> memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
> sock_ops.is_fullsock = 1;
> + sock_ops.is_locked_tcp_sock = 1;
> sock_ops.sk = sk;
> bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
>
> @@ -184,6 +185,7 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
> memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> sock_ops.op = bpf_op;
> sock_ops.is_fullsock = 1;
> + sock_ops.is_locked_tcp_sock = 1;
> sock_ops.sk = sk;
> /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
> if (skb)
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 40568365cdb3b..2f109f1968253 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -509,6 +509,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
> sock_owned_by_me(sk);
>
> sock_ops.is_fullsock = 1;
> + sock_ops.is_locked_tcp_sock = 1;
> sock_ops.sk = sk;
> }
>
> @@ -554,6 +555,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
> sock_owned_by_me(sk);
>
> sock_ops.is_fullsock = 1;
> + sock_ops.is_locked_tcp_sock = 1;
> sock_ops.sk = sk;
> }
>
Hello folks,
I'm pleased to announce the release of AUTOSEL, a complete rewrite of the
stable kernel patch selection tool that Julia Lawall and I presented back in
2018[1]. Unlike the previous version that relied on word statistics and older
neural network techniques, AUTOSEL leverages modern large language models and
embedding technology to provide significantly more accurate recommendations.
## What is AUTOSEL?
AUTOSEL automatically analyzes Linux kernel commits to determine whether they
should be backported to stable kernel trees. It examines commit messages, code
changes, and historical backporting patterns to make intelligent recommendations.
This is a complete rewrite of the original tool[1], with several major improvements:
1. Uses large language models (Claude, OpenAI, NVIDIA models) for semantic understanding
2. Implements embeddings-based similar commit retrieval for better context
3. Provides detailed explanations for each recommendation
4. Supports batch processing for efficient analysis of multiple commits
## Key Features
- Support for multiple LLM providers (Claude, OpenAI, NVIDIA)
- Self-contained embeddings using Candle
- Optional CUDA acceleration for faster analysis
- Detailed explanations of backporting decisions
- Extensive test coverage and validation
## Getting Started
```
git clone https://git.sr.ht/~sashal/autosel
cd autosel
cargo build --release
```
To analyze a specific commit:
```
./target/release/autosel --kernel-repo ~/linux --models claude --commit <SHA>
```
For more information, see the README.md file in the repository.
[1] https://lwn.net/Articles/764647/
--
Thanks,
Sasha
This reverts commit d6e020819612a4a06207af858e0978be4d3e3140.
The IS_DGFX check was put in place because error capture of buffer
objects is expected to be broken on devices with VRAM.
We seem to have already submitted the userspace fix to remove that
flag, so lets just rely on that for DG1.
Cc: stable(a)vger.kernel.org # v6.0+
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: Andi Shyti <andi.shyti(a)linux.intel.com>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Tvrtko Ursulin <tursulin(a)ursulin.net>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 7d44aadcd5a5..02c59808cbe4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2013,7 +2013,7 @@ static int eb_capture_stage(struct i915_execbuffer *eb)
continue;
if (i915_gem_context_is_recoverable(eb->gem_context) &&
- GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 10))
+ (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0)))
return -EINVAL;
for_each_batch_create_order(eb, j) {
--
2.49.0
From: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Commit 3d05fc82237a ("Bluetooth: qca: set power_ctrl_enabled on NULL
returned by gpiod_get_optional()") accidentally changed the prevous
behavior where power control would be disabled without the BT_EN GPIO
only on QCA_WCN6750 and QCA_WCN6855 while also getting the error check
wrong. We should treat every IS_ERR() return value from
devm_gpiod_get_optional() as a reason to bail-out while we should only
set power_ctrl_enabled to false on the two models mentioned above. While
at it: use dev_err_probe() to save a LOC.
Cc: stable(a)vger.kernel.org
Fixes: 3d05fc82237a ("Bluetooth: qca: set power_ctrl_enabled on NULL returned by gpiod_get_optional()")
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
---
drivers/bluetooth/hci_qca.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index e00590ba24fdb..a2dc39c005f4f 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -2415,14 +2415,14 @@ static int qca_serdev_probe(struct serdev_device *serdev)
qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
- if (IS_ERR(qcadev->bt_en) &&
- (data->soc_type == QCA_WCN6750 ||
- data->soc_type == QCA_WCN6855)) {
- dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
- return PTR_ERR(qcadev->bt_en);
- }
+ if (IS_ERR(qcadev->bt_en))
+ return dev_err_probe(&serdev->dev,
+ PTR_ERR(qcadev->bt_en),
+ "failed to acquire BT_EN gpio\n");
- if (!qcadev->bt_en)
+ if (!qcadev->bt_en &&
+ (data->soc_type == QCA_WCN6750 ||
+ data->soc_type == QCA_WCN6855))
power_ctrl_enabled = false;
qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
--
2.48.1
The patch fixes a deadlock which can be triggered by an internal
syzkaller [1] reproducer and captured by bpftrace script [2] and its log
[3] in this scenario:
Process 1 Process 2
--- ---
hugetlb_fault
mutex_lock(B) // take B
filemap_lock_hugetlb_folio
filemap_lock_folio
__filemap_get_folio
folio_lock(A) // take A
hugetlb_wp
mutex_unlock(B) // release B
... hugetlb_fault
... mutex_lock(B) // take B
filemap_lock_hugetlb_folio
filemap_lock_folio
__filemap_get_folio
folio_lock(A) // blocked
unmap_ref_private
...
mutex_lock(B) // retake and blocked
This is a ABBA deadlock involving two locks:
- Lock A: pagecache_folio lock
- Lock B: hugetlb_fault_mutex_table lock
The deadlock occurs between two processes as follows:
1. The first process (let’s call it Process 1) is handling a
copy-on-write (COW) operation on a hugepage via hugetlb_wp. Due to
insufficient reserved hugetlb pages, Process 1, owner of the reserved
hugetlb page, attempts to unmap a hugepage owned by another process
(non-owner) to satisfy the reservation. Before unmapping, Process 1
acquires lock B (hugetlb_fault_mutex_table lock) and then lock A
(pagecache_folio lock). To proceed with the unmap, it releases Lock B
but retains Lock A. After the unmap, Process 1 tries to reacquire Lock
B. However, at this point, Lock B has already been acquired by another
process.
2. The second process (Process 2) enters the hugetlb_fault handler
during the unmap operation. It successfully acquires Lock B
(hugetlb_fault_mutex_table lock) that was just released by Process 1,
but then attempts to acquire Lock A (pagecache_folio lock), which is
still held by Process 1.
As a result, Process 1 (holding Lock A) is blocked waiting for Lock B
(held by Process 2), while Process 2 (holding Lock B) is blocked waiting
for Lock A (held by Process 1), constructing a ABBA deadlock scenario.
The solution here is to unlock the pagecache_folio and provide the
pagecache_folio_unlocked variable to the caller to have the visibility
over the pagecache_folio status for subsequent handling.
The error message:
INFO: task repro_20250402_:13229 blocked for more than 64 seconds.
Not tainted 6.15.0-rc3+ #24
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:repro_20250402_ state:D stack:25856 pid:13229 tgid:13228 ppid:3513 task_flags:0x400040 flags:0x00004006
Call Trace:
<TASK>
__schedule+0x1755/0x4f50
schedule+0x158/0x330
schedule_preempt_disabled+0x15/0x30
__mutex_lock+0x75f/0xeb0
hugetlb_wp+0xf88/0x3440
hugetlb_fault+0x14c8/0x2c30
trace_clock_x86_tsc+0x20/0x20
do_user_addr_fault+0x61d/0x1490
exc_page_fault+0x64/0x100
asm_exc_page_fault+0x26/0x30
RIP: 0010:__put_user_4+0xd/0x20
copy_process+0x1f4a/0x3d60
kernel_clone+0x210/0x8f0
__x64_sys_clone+0x18d/0x1f0
do_syscall_64+0x6a/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x41b26d
</TASK>
INFO: task repro_20250402_:13229 is blocked on a mutex likely owned by task repro_20250402_:13250.
task:repro_20250402_ state:D stack:28288 pid:13250 tgid:13228 ppid:3513 task_flags:0x400040 flags:0x00000006
Call Trace:
<TASK>
__schedule+0x1755/0x4f50
schedule+0x158/0x330
io_schedule+0x92/0x110
folio_wait_bit_common+0x69a/0xba0
__filemap_get_folio+0x154/0xb70
hugetlb_fault+0xa50/0x2c30
trace_clock_x86_tsc+0x20/0x20
do_user_addr_fault+0xace/0x1490
exc_page_fault+0x64/0x100
asm_exc_page_fault+0x26/0x30
RIP: 0033:0x402619
</TASK>
INFO: task repro_20250402_:13250 blocked for more than 65 seconds.
Not tainted 6.15.0-rc3+ #24
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:repro_20250402_ state:D stack:28288 pid:13250 tgid:13228 ppid:3513 task_flags:0x400040 flags:0x00000006
Call Trace:
<TASK>
__schedule+0x1755/0x4f50
schedule+0x158/0x330
io_schedule+0x92/0x110
folio_wait_bit_common+0x69a/0xba0
__filemap_get_folio+0x154/0xb70
hugetlb_fault+0xa50/0x2c30
trace_clock_x86_tsc+0x20/0x20
do_user_addr_fault+0xace/0x1490
exc_page_fault+0x64/0x100
asm_exc_page_fault+0x26/0x30
RIP: 0033:0x402619
</TASK>
Showing all locks held in the system:
1 lock held by khungtaskd/35:
#0: ffffffff879a7440 (rcu_read_lock){....}-{1:3}, at: debug_show_all_locks+0x30/0x180
2 locks held by repro_20250402_/13229:
#0: ffff888017d801e0 (&mm->mmap_lock){++++}-{4:4}, at: lock_mm_and_find_vma+0x37/0x300
#1: ffff888000fec848 (&hugetlb_fault_mutex_table[i]){+.+.}-{4:4}, at: hugetlb_wp+0xf88/0x3440
3 locks held by repro_20250402_/13250:
#0: ffff8880177f3d08 (vm_lock){++++}-{0:0}, at: do_user_addr_fault+0x41b/0x1490
#1: ffff888000fec848 (&hugetlb_fault_mutex_table[i]){+.+.}-{4:4}, at: hugetlb_fault+0x3b8/0x2c30
#2: ffff8880129500e8 (&resv_map->rw_sema){++++}-{4:4}, at: hugetlb_fault+0x494/0x2c30
Link: https://drive.google.com/file/d/1DVRnIW-vSayU5J1re9Ct_br3jJQU6Vpb/view?usp=… [1]
Link: https://github.com/bboymimi/bpftracer/blob/master/scripts/hugetlb_lock_debu… [2]
Link: https://drive.google.com/file/d/1bWq2-8o-BJAuhoHWX7zAhI6ggfhVzQUI/view?usp=… [3]
Fixes: 40549ba8f8e0 ("hugetlb: use new vma_lock for pmd sharing synchronization")
Cc: <stable(a)vger.kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Gavin Shan <gshan(a)redhat.com>
Signed-off-by: Gavin Guo <gavinguo(a)igalia.com>
---
mm/hugetlb.c | 33 ++++++++++++++++++++++++++++-----
1 file changed, 28 insertions(+), 5 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e3e6ac991b9c..ad54a74aa563 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6115,7 +6115,8 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
* Keep the pte_same checks anyway to make transition from the mutex easier.
*/
static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
- struct vm_fault *vmf)
+ struct vm_fault *vmf,
+ bool *pagecache_folio_unlocked)
{
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
@@ -6212,6 +6213,22 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
u32 hash;
folio_put(old_folio);
+ /*
+ * The pagecache_folio needs to be unlocked to avoid
+ * deadlock and we won't re-lock it in hugetlb_wp(). The
+ * pagecache_folio could be truncated after being
+ * unlocked. So its state should not be relied
+ * subsequently.
+ *
+ * Setting *pagecache_folio_unlocked to true allows the
+ * caller to handle any necessary logic related to the
+ * folio's unlocked state.
+ */
+ if (pagecache_folio) {
+ folio_unlock(pagecache_folio);
+ if (pagecache_folio_unlocked)
+ *pagecache_folio_unlocked = true;
+ }
/*
* Drop hugetlb_fault_mutex and vma_lock before
* unmapping. unmapping needs to hold vma_lock
@@ -6566,7 +6583,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
- ret = hugetlb_wp(folio, vmf);
+ ret = hugetlb_wp(folio, vmf, NULL);
}
spin_unlock(vmf->ptl);
@@ -6638,6 +6655,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
struct hstate *h = hstate_vma(vma);
struct address_space *mapping;
int need_wait_lock = 0;
+ bool pagecache_folio_unlocked = false;
struct vm_fault vmf = {
.vma = vma,
.address = address & huge_page_mask(h),
@@ -6792,7 +6810,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
if (!huge_pte_write(vmf.orig_pte)) {
- ret = hugetlb_wp(pagecache_folio, &vmf);
+ ret = hugetlb_wp(pagecache_folio, &vmf,
+ &pagecache_folio_unlocked);
goto out_put_page;
} else if (likely(flags & FAULT_FLAG_WRITE)) {
vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
@@ -6809,10 +6828,14 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
out_ptl:
spin_unlock(vmf.ptl);
- if (pagecache_folio) {
+ /*
+ * If the pagecache_folio is unlocked in hugetlb_wp(), we skip
+ * folio_unlock() here.
+ */
+ if (pagecache_folio && !pagecache_folio_unlocked)
folio_unlock(pagecache_folio);
+ if (pagecache_folio)
folio_put(pagecache_folio);
- }
out_mutex:
hugetlb_vma_unlock_read(vma);
base-commit: d76bb1ebb5587f66b0f8b8099bfbb44722bc08b3
--
2.43.0
On Mon, May 26, 2025 at 02:40:33PM +0000, Parav Pandit wrote:
> When the PCI device is surprise-removed, requests may not complete on
> the device as the VQ is marked as broken. As a result, disk deletion
> hangs.
>
> Fix it by aborting the requests when the VQ is broken.
>
> With this fix now fio completes swiftly.
> An alternative of IO timeout has been considered, however
> when the driver knows about unresponsive block device, swiftly clearing
> them enables users and upper layers to react quickly.
>
> Verified with multiple device unplug iterations with pending requests in
> virtio used ring and some pending with the device.
>
> Fixes: 43bb40c5b926 ("virtio_pci: Support surprise removal of virtio pci device")
> Cc: stable(a)vger.kernel.org
> Reported-by: Li RongQing <lirongqing(a)baidu.com>
> Closes: https://lore.kernel.org/virtualization/c45dd68698cd47238c55fb73ca9b4741@bai…
> Reviewed-by: Max Gurtovoy <mgurtovoy(a)nvidia.com>
> Reviewed-by: Israel Rukshin <israelr(a)nvidia.com>
> Signed-off-by: Parav Pandit <parav(a)nvidia.com>
Thanks!
I like the patch. Yes something to improve:
> ---
> v1->v2:
> - Addressed comments from Stephan
> - fixed spelling to 'waiting'
> - Addressed comments from Michael
> - Dropped checking broken vq from queue_rq() and queue_rqs()
> because it is checked in lower layer routines in virtio core
>
> v0->v1:
> - Fixed comments from Stefan to rename a cleanup function
> - Improved logic for handling any outstanding requests
> in bio layer
> - improved cancel callback to sync with ongoing done()
> ---
> drivers/block/virtio_blk.c | 83 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 83 insertions(+)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 7cffea01d868..12f31e6c00cb 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -1554,6 +1554,87 @@ static int virtblk_probe(struct virtio_device *vdev)
> return err;
> }
>
> +static bool virtblk_request_cancel(struct request *rq, void *data)
> +{
> + struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
> + struct virtio_blk *vblk = data;
> + struct virtio_blk_vq *vq;
> + unsigned long flags;
> +
> + vq = &vblk->vqs[rq->mq_hctx->queue_num];
> +
> + spin_lock_irqsave(&vq->lock, flags);
> +
> + vbr->in_hdr.status = VIRTIO_BLK_S_IOERR;
My undertanding is that this is only safe to call when device
is not accessing the vq anymore? Right? otherwise device can
overwrite the status?
But I am not sure I understand what guarantees this.
Is there an assumption here that if vq is broken
and we are on remove path then device is already gone?
It seems to hold but
I'd prefer something that makes this guarantee at the API
level.
> + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq))
> + blk_mq_complete_request(rq);
> +
> + spin_unlock_irqrestore(&vq->lock, flags);
> + return true;
> +}
> +
> +static void virtblk_broken_device_cleanup(struct virtio_blk *vblk)
> +{
> + struct request_queue *q = vblk->disk->queue;
> +
> + if (!virtqueue_is_broken(vblk->vqs[0].vq))
> + return;
can marking vqs broken be in progress such that 0
is already broken but another one is not, yet?
if not pls add a comment explainging why.
> +
> + /* Start freezing the queue, so that new requests keeps waiting at the
> + * door of bio_queue_enter(). We cannot fully freeze the queue because
> + * freezed queue is an empty queue and there are pending requests, so
> + * only start freezing it.
> + */
> + blk_freeze_queue_start(q);
> +
> + /* When quiescing completes, all ongoing dispatches have completed
> + * and no new dispatch will happen towards the driver.
> + * This ensures that later when cancel is attempted, then are not
> + * getting processed by the queue_rq() or queue_rqs() handlers.
> + */
> + blk_mq_quiesce_queue(q);
> +
> + /*
> + * Synchronize with any ongoing VQ callbacks, effectively quiescing
> + * the device and preventing it from completing further requests
> + * to the block layer. Any outstanding, incomplete requests will be
> + * completed by virtblk_request_cancel().
I think what you really mean is:
finish processing in callbacks, that might have started before vqs were marked as
broken.
> + */
> + virtio_synchronize_cbs(vblk->vdev);
> +
> + /* At this point, no new requests can enter the queue_rq() and
> + * completion routine will not complete any new requests either for the
> + * broken vq. Hence, it is safe to cancel all requests which are
> + * started.
> + */
> + blk_mq_tagset_busy_iter(&vblk->tag_set, virtblk_request_cancel, vblk);
> + blk_mq_tagset_wait_completed_request(&vblk->tag_set);
> +
> + /* All pending requests are cleaned up. Time to resume so that disk
> + * deletion can be smooth. Start the HW queues so that when queue is
> + * unquiesced requests can again enter the driver.
> + */
> + blk_mq_start_stopped_hw_queues(q, true);
> +
> + /* Unquiescing will trigger dispatching any pending requests to the
> + * driver which has crossed bio_queue_enter() to the driver.
> + */
> + blk_mq_unquiesce_queue(q);
> +
> + /* Wait for all pending dispatches to terminate which may have been
> + * initiated after unquiescing.
> + */
> + blk_mq_freeze_queue_wait(q);
> +
> + /* Mark the disk dead so that once queue unfreeze, the requests
> + * waiting at the door of bio_queue_enter() can be aborted right away.
> + */
> + blk_mark_disk_dead(vblk->disk);
> +
> + /* Unfreeze the queue so that any waiting requests will be aborted. */
> + blk_mq_unfreeze_queue_nomemrestore(q);
> +}
> +
> static void virtblk_remove(struct virtio_device *vdev)
> {
> struct virtio_blk *vblk = vdev->priv;
> @@ -1561,6 +1642,8 @@ static void virtblk_remove(struct virtio_device *vdev)
> /* Make sure no work handler is accessing the device. */
> flush_work(&vblk->config_work);
>
> + virtblk_broken_device_cleanup(vblk);
> +
> del_gendisk(vblk->disk);
> blk_mq_free_tag_set(&vblk->tag_set);
>
> --
> 2.34.1
The function mod_hdcp_hdcp1_enable_encryption() calls the function
get_first_active_display(), but does not check its return value.
The return value is a null pointer if the display list is empty.
This will lead to a null pointer dereference in
mod_hdcp_hdcp2_enable_encryption().
Add a null pointer check for get_first_active_display() and return
MOD_HDCP_STATUS_DISPLAY_NOT_FOUND if the function return null.
Fixes: 2deade5ede56 ("drm/amd/display: Remove hdcp display state with mst fix")
Cc: stable(a)vger.kernel.org # v5.8
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index 8c137d7c032e..e58e7b93810b 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -368,6 +368,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp)
struct mod_hdcp_display *display = get_first_active_display(hdcp);
enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+ if (!display)
+ return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
+
mutex_lock(&psp->hdcp_context.mutex);
hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf;
memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
--
2.42.0.windows.2
Hi Paul,
Sorry for late replay, it takes some effort to change company policy of the proprietary.
For the questions:
1. What upstream tree did you intend it for and why?
- Linux mainline
We are developing openBMC with kernel-6.6.
For submitting patch to kernel-6.6 stable tree, it should exist in mainline first.
Reference: https://github.com/openbmc/linux/commits/dev-6.6/
2. Have you seen such cards in the wild? It wouldn't harm mentioning
specific examples in the commit message to probably help people
searching for problems specific to them later. You can also consider
adding Fixes: and Cc: stable tags if this bugfix solves a real issue
and should be backported to stable kernels.
- This NIC is developed by META terminus team and the problematic string is:
The channel Version Str : 24.12.08-000
I will update it to commit message later.
> -----Original Message-----
> From: Paul Fertser <fercerpav(a)gmail.com>
> Sent: Thursday, May 15, 2025 5:04 PM
> To: Jerry C Chen/WYHQ/Wiwynn <Jerry_C_Chen(a)wiwynn.com>
> Cc: patrick(a)stwcx.xyz; Samuel Mendoza-Jonas <sam(a)mendozajonas.com>;
> David S. Miller <davem(a)davemloft.net>; Eric Dumazet
> <edumazet(a)google.com>; Jakub Kicinski <kuba(a)kernel.org>; Paolo Abeni
> <pabeni(a)redhat.com>; Simon Horman <horms(a)kernel.org>;
> netdev(a)vger.kernel.org; linux-kernel(a)vger.kernel.org
> Subject: Re: [PATCH v1] net/ncsi: fix buffer overflow in getting version id
>
> [External Sender]
>
> Hello Jerry,
>
> This looks like an updated version of your previous patch[0] but you have
> forgotten to increase the number in the Subject. You have also forgotten to
> reply and take into account /some/ of the points I raised in the review.
>
> On Thu, May 15, 2025 at 04:34:47PM +0800, Jerry C Chen wrote:
> > In NC-SI spec v1.2 section 8.4.44.2, the firmware name doesn't need to
> > be null terminated while its size occupies the full size of the field.
> > Fix the buffer overflow issue by adding one additional byte for null
> > terminator.
> ...
>
> Please give an answer to every comment I made for your previous patch
> version and either make a corresponding change or explain why exactly you
> disagree.
>
> Also please stop sending any and all "proprietary or confidential information".
>
> [0]
> https://urldefense.com/v3/__https://patchwork.kernel.org/project/netdevbpf/p
> atch/20250227055044.3878374-1-Jerry_C_Chen(a)wiwynn.com/__;!!ObgLwW8
> oGsQ!nQ0Zkq6AxOKAJHbUUrTRnNI8fJNt7itufBwUXkkZU1-yfFo3h6Vm55K_mqr
> 5Ur5kw9wE9cMVgIdoGCL3u2DhhqA$
commit 732b87a409667a370b87955c518e5d004de740b5 upstream.
Determining the SST/MST mode during state computation must be done based
on the output type stored in the CRTC state, which in turn is set once
based on the modeset connector's SST vs. MST type and will not change as
long as the connector is using the CRTC. OTOH the MST mode indicated by
the given connector's intel_dp::is_mst flag can change independently of
the above output type, based on what sink is at any moment plugged to
the connector.
Fix the state computation accordingly.
Cc: Jani Nikula <jani.nikula(a)intel.com>
Fixes: f6971d7427c2 ("drm/i915/mst: adapt intel_dp_mtp_tu_compute_config() for 128b/132b SST")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4607
Reviewed-by: Jani Nikula <jani.nikula(a)intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Link: https://lore.kernel.org/r/20250507151953.251846-1-imre.deak@intel.com
(cherry picked from commit 0f45696ddb2b901fbf15cb8d2e89767be481d59f)
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
(cherry picked from commit 732b87a409667a370b87955c518e5d004de740b5)
References: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14218
[Rebased on v6.14.8 and added References link. (Imre)]
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
drivers/gpu/drm/i915/display/intel_dp_mst.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 86d6185fda50a..8a6135b179d3b 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -221,6 +221,7 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp,
to_intel_connector(conn_state->connector);
const struct drm_display_mode *adjusted_mode =
&crtc_state->hw.adjusted_mode;
+ bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST);
fixed20_12 pbn_div;
int bpp, slots = -EINVAL;
int dsc_slice_count = 0;
@@ -271,7 +272,7 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp,
link_bpp_x16,
&crtc_state->dp_m_n);
- if (intel_dp->is_mst) {
+ if (is_mst) {
int remote_bw_overhead;
int remote_tu;
fixed20_12 pbn;
--
2.44.2
From: Maud Spierings <maudspierings(a)gocontroll.com>
Throughout the various probe functions &indio_dev->dev is used before it
is initialized. This caused a kernel panic in st_sensors_power_enable()
when the call to devm_regulator_bulk_get_enable() fails and then calls
dev_err_probe() with the uninitialized device.
This seems to only cause a panic with dev_err_probe(), dev_err(),
dev_warn() and dev_info() don't seem to cause a panic, but are fixed
as well.
The issue is reported and traced here: [1]
[1]: https://lore.kernel.org/all/AM7P189MB100986A83D2F28AF3FFAF976E39EA@AM7P189M…
Cc: stable(a)vger.kernel.org
Signed-off-by: Maud Spierings <maudspierings(a)gocontroll.com>
---
When I search for general &indio_dev->dev usage, I see quite a lot more
hits, but I am not sure if there are issues with those too.
This issue has existed for a long time it seems and therefore it is
nearly impossible to find a proper fixes tag. I would love to see it at
least backported to 6.12 as that is where I encountered it, and I
believe the patch should apply without conflicts.
---
Changes in v3:
- Added the stable cc to the commit message
- Move the link to the original issue to the commit message
- Fix function notation in the commit message
- Move some more of the dev_*() calls to one line
- Link to v2: https://lore.kernel.org/r/20250522-st_iio_fix-v2-1-07a32655a996@gocontroll.…
Changes in v2:
- Added SoB in commit message
- Link to v1: https://lore.kernel.org/r/20250522-st_iio_fix-v1-1-d689b35f1612@gocontroll.…
---
drivers/iio/accel/st_accel_core.c | 10 +++---
drivers/iio/common/st_sensors/st_sensors_core.c | 36 ++++++++++------------
drivers/iio/common/st_sensors/st_sensors_trigger.c | 20 ++++++------
3 files changed, 31 insertions(+), 35 deletions(-)
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 99cb661fabb2d9cc1943fa8d0a6f3becb71126e6..a7961c610ed203d039bbf298c8883031a578fb0b 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1353,6 +1353,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
union acpi_object *ont;
union acpi_object *elements;
acpi_status status;
+ struct device *parent = indio_dev->dev.parent;
int ret = -EINVAL;
unsigned int val;
int i, j;
@@ -1371,7 +1372,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
};
- adev = ACPI_COMPANION(indio_dev->dev.parent);
+ adev = ACPI_COMPANION(parent);
if (!adev)
return -ENXIO;
@@ -1380,8 +1381,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
if (status == AE_NOT_FOUND) {
return -ENXIO;
} else if (ACPI_FAILURE(status)) {
- dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n",
- status);
+ dev_warn(parent, "failed to execute _ONT: %d\n", status);
return status;
}
@@ -1457,12 +1457,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
}
ret = 0;
- dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n");
+ dev_info(parent, "computed mount matrix from ACPI\n");
out:
kfree(buffer.pointer);
if (ret)
- dev_dbg(&indio_dev->dev,
+ dev_dbg(parent,
"failed to apply ACPI orientation data: %d\n", ret);
return ret;
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 8ce1dccfea4f5aaff45d3d40f6542323dd1f0b09..dac593be56958fd0be92e13f628350fcfd0f040d 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -154,7 +154,7 @@ static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs)
return err;
st_accel_set_fullscale_error:
- dev_err(&indio_dev->dev, "failed to set new fullscale.\n");
+ dev_err(indio_dev->dev.parent, "failed to set new fullscale.\n");
return err;
}
@@ -231,8 +231,7 @@ int st_sensors_power_enable(struct iio_dev *indio_dev)
ARRAY_SIZE(regulator_names),
regulator_names);
if (err)
- return dev_err_probe(&indio_dev->dev, err,
- "unable to enable supplies\n");
+ return dev_err_probe(parent, err, "unable to enable supplies\n");
return 0;
}
@@ -241,13 +240,14 @@ EXPORT_SYMBOL_NS(st_sensors_power_enable, "IIO_ST_SENSORS");
static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
/* Sensor does not support interrupts */
if (!sdata->sensor_settings->drdy_irq.int1.addr &&
!sdata->sensor_settings->drdy_irq.int2.addr) {
if (pdata->drdy_int_pin)
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"DRDY on pin INT%d specified, but sensor does not support interrupts\n",
pdata->drdy_int_pin);
return 0;
@@ -256,29 +256,27 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
switch (pdata->drdy_int_pin) {
case 1:
if (!sdata->sensor_settings->drdy_irq.int1.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT1 not available.\n");
+ dev_err(parent, "DRDY on INT1 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 1;
break;
case 2:
if (!sdata->sensor_settings->drdy_irq.int2.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT2 not available.\n");
+ dev_err(parent, "DRDY on INT2 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 2;
break;
default:
- dev_err(&indio_dev->dev, "DRDY on pdata not valid.\n");
+ dev_err(parent, "DRDY on pdata not valid.\n");
return -EINVAL;
}
if (pdata->open_drain) {
if (!sdata->sensor_settings->drdy_irq.int1.addr_od &&
!sdata->sensor_settings->drdy_irq.int2.addr_od)
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"open drain requested but unsupported.\n");
else
sdata->int_pin_open_drain = true;
@@ -336,6 +334,7 @@ EXPORT_SYMBOL_NS(st_sensors_dev_name_probe, "IIO_ST_SENSORS");
int st_sensors_init_sensor(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
struct st_sensors_platform_data *of_pdata;
int err = 0;
@@ -343,7 +342,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mutex_init(&sdata->odr_lock);
/* If OF/DT pdata exists, it will take precedence of anything else */
- of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata);
+ of_pdata = st_sensors_dev_probe(parent, pdata);
if (IS_ERR(of_pdata))
return PTR_ERR(of_pdata);
if (of_pdata)
@@ -370,7 +369,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
if (err < 0)
return err;
} else
- dev_info(&indio_dev->dev, "Full-scale not possible\n");
+ dev_info(parent, "Full-scale not possible\n");
err = st_sensors_set_odr(indio_dev, sdata->odr);
if (err < 0)
@@ -405,7 +404,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mask = sdata->sensor_settings->drdy_irq.int2.mask_od;
}
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"set interrupt line to open drain mode on pin %d\n",
sdata->drdy_int_pin);
err = st_sensors_write_data_with_mask(indio_dev, addr,
@@ -593,21 +592,20 @@ EXPORT_SYMBOL_NS(st_sensors_get_settings_index, "IIO_ST_SENSORS");
int st_sensors_verify_id(struct iio_dev *indio_dev)
{
struct st_sensor_data *sdata = iio_priv(indio_dev);
+ struct device *parent = indio_dev->dev.parent;
int wai, err;
if (sdata->sensor_settings->wai_addr) {
err = regmap_read(sdata->regmap,
sdata->sensor_settings->wai_addr, &wai);
if (err < 0) {
- dev_err(&indio_dev->dev,
- "failed to read Who-Am-I register.\n");
- return err;
+ return dev_err_probe(parent, err,
+ "failed to read Who-Am-I register.\n");
}
if (sdata->sensor_settings->wai != wai) {
- dev_warn(&indio_dev->dev,
- "%s: WhoAmI mismatch (0x%x).\n",
- indio_dev->name, wai);
+ dev_warn(parent, "%s: WhoAmI mismatch (0x%x).\n",
+ indio_dev->name, wai);
}
}
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 9d4bf822a15dfcdd6c2835f6b9d7698cd3cb0b08..8a8ab688d7980f6dd43c660f90a0eba32c38388b 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -127,7 +127,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig = devm_iio_trigger_alloc(parent, "%s-trigger",
indio_dev->name);
if (sdata->trig == NULL) {
- dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n");
+ dev_err(parent, "failed to allocate iio trigger.\n");
return -ENOMEM;
}
@@ -143,7 +143,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
case IRQF_TRIGGER_FALLING:
case IRQF_TRIGGER_LOW:
if (!sdata->sensor_settings->drdy_irq.addr_ihl) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"falling/low specified for IRQ but hardware supports only rising/high: will request rising/high\n");
if (irq_trig == IRQF_TRIGGER_FALLING)
irq_trig = IRQF_TRIGGER_RISING;
@@ -156,21 +156,19 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->sensor_settings->drdy_irq.mask_ihl, 1);
if (err < 0)
return err;
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts on the falling edge or active low level\n");
}
break;
case IRQF_TRIGGER_RISING:
- dev_info(&indio_dev->dev,
- "interrupts on the rising edge\n");
+ dev_info(parent, "interrupts on the rising edge\n");
break;
case IRQF_TRIGGER_HIGH:
- dev_info(&indio_dev->dev,
- "interrupts active high level\n");
+ dev_info(parent, "interrupts active high level\n");
break;
default:
/* This is the most preferred mode, if possible */
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"unsupported IRQ trigger specified (%lx), enforce rising edge\n", irq_trig);
irq_trig = IRQF_TRIGGER_RISING;
}
@@ -179,7 +177,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
if (irq_trig == IRQF_TRIGGER_FALLING ||
irq_trig == IRQF_TRIGGER_RISING) {
if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"edge IRQ not supported w/o stat register.\n");
return -EOPNOTSUPP;
}
@@ -214,13 +212,13 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig->name,
sdata->trig);
if (err) {
- dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n");
+ dev_err(parent, "failed to request trigger IRQ.\n");
return err;
}
err = devm_iio_trigger_register(parent, sdata->trig);
if (err < 0) {
- dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
+ dev_err(parent, "failed to register iio trigger.\n");
return err;
}
indio_dev->trig = iio_trigger_get(sdata->trig);
---
base-commit: 7bac2c97af4078d7a627500c9bcdd5b033f97718
change-id: 20250522-st_iio_fix-1c58fdd4d420
Best regards,
--
Maud Spierings <maudspierings(a)gocontroll.com>
The function mlx5_query_nic_vport_qkey_viol_cntr() calls the function
mlx5_query_nic_vport_context() but does not check its return value. This
could lead to undefined behavior if the query fails. A proper
implementation can be found in mlx5_nic_vport_query_local_lb().
Add error handling for mlx5_query_nic_vport_context(). If it fails, free
the out buffer via kvfree() and return error code.
Fixes: 9efa75254593 ("net/mlx5_core: Introduce access functions to query vport RoCE fields")
Cc: stable(a)vger.kernel.org # v4.5
Target: net
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
v3: Explicitly mention target branch. Change improper code.
v2: Remove redundant reassignment. Fix RCT.
drivers/net/ethernet/mellanox/mlx5/core/vport.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 66e44905c1f0..e4b86633d2fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -522,19 +522,22 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
{
u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ int err;
out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
- mlx5_query_nic_vport_context(mdev, 0, out);
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
+ if (err)
+ goto out;
*qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
nic_vport_context.qkey_violation_counter);
-
+out:
kvfree(out);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
--
2.42.0.windows.2
The function max14577_reg_get_current_limit() calls the function
max14577_read_reg(), but does not check its return value. A proper
implementation can be found in max14577_get_online().
Add a error check for the max14577_read_reg() and return error code
if the function fails.
Fixes: b0902bbeb768 ("regulator: max14577: Add regulator driver for Maxim 14577")
Cc: stable(a)vger.kernel.org # v3.14
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/regulator/max14577-regulator.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/regulator/max14577-regulator.c b/drivers/regulator/max14577-regulator.c
index 5e7171b9065a..41fd15adfd1f 100644
--- a/drivers/regulator/max14577-regulator.c
+++ b/drivers/regulator/max14577-regulator.c
@@ -40,11 +40,14 @@ static int max14577_reg_get_current_limit(struct regulator_dev *rdev)
struct max14577 *max14577 = rdev_get_drvdata(rdev);
const struct maxim_charger_current *limits =
&maxim_charger_currents[max14577->dev_type];
+ int ret;
if (rdev_get_id(rdev) != MAX14577_CHARGER)
return -EINVAL;
- max14577_read_reg(rmap, MAX14577_CHG_REG_CHG_CTRL4, ®_data);
+ ret = max14577_read_reg(rmap, MAX14577_CHG_REG_CHG_CTRL4, ®_data);
+ if (ret < 0)
+ return ret;
if ((reg_data & CHGCTRL4_MBCICHWRCL_MASK) == 0)
return limits->min;
--
2.42.0.windows.2
KASAN reported out of bounds access - cs_dsp_ctl_cache_init_multiple_offsets().
The code uses mock_coeff_template.length_bytes (4 bytes) for register value
allocations. But later, this length is set to 8 bytes which causes
test code failures.
As fix, just remove the lenght override, keeping the original value 4
for all operations.
Cc: Simon Trimmer <simont(a)opensource.cirrus.com>
Cc: Charles Keepax <ckeepax(a)opensource.cirrus.com>
Cc: Richard Fitzgerald <rf(a)opensource.cirrus.com>
Cc: patches(a)opensource.cirrus.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Jaroslav Kysela <perex(a)perex.cz>
---
drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c b/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c
index 83386cc978e3..ebca3a4ab0f1 100644
--- a/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c
+++ b/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c
@@ -776,7 +776,6 @@ static void cs_dsp_ctl_cache_init_multiple_offsets(struct kunit *test)
"dummyalg", NULL);
/* Create controls identical except for offset */
- def.length_bytes = 8;
def.offset_dsp_words = 0;
def.shortname = "CtlA";
cs_dsp_mock_wmfw_add_coeff_desc(local->wmfw_builder, &def);
--
2.49.0
If the directory is corrupted and the number of nlinks is less than 2
(valid nlinks have at least 2), then when the directory is deleted, the
minix_rmdir will try to reduce the nlinks(unsigned int) to a negative
value.
Make nlinks validity check for directories.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrey Kriulin <kitotavrik.s(a)gmail.com>
---
v4: Add nlinks check for parent dirictory to minix_rmdir per Jan
Kara <jack(a)suse.cz> request.
v3: Move nlinks validaty check to minix_rmdir and minix_rename per Jan
Kara <jack(a)suse.cz> request.
v2: Move nlinks validaty check to V[12]_minix_iget() per Jan Kara
<jack(a)suse.cz> request. Change return error code to EUCLEAN. Don't block
directory in r/o mode per Al Viro <viro(a)zeniv.linux.org.uk> request.
fs/minix/namei.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 8938536d8d3c..ab86fd16e548 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -161,8 +161,12 @@ static int minix_unlink(struct inode * dir, struct dentry *dentry)
static int minix_rmdir(struct inode * dir, struct dentry *dentry)
{
struct inode * inode = d_inode(dentry);
- int err = -ENOTEMPTY;
+ int err = -EUCLEAN;
+ if (inode->i_nlink < 2 || dir->i_nlink <= 2)
+ return err;
+
+ err = -ENOTEMPTY;
if (minix_empty_dir(inode)) {
err = minix_unlink(dir, dentry);
if (!err) {
@@ -235,6 +239,10 @@ static int minix_rename(struct mnt_idmap *idmap,
mark_inode_dirty(old_inode);
if (dir_de) {
+ if (old_dir->i_nlink <= 2) {
+ err = -EUCLEAN;
+ goto out_dir;
+ }
err = minix_set_link(dir_de, dir_folio, new_dir);
if (!err)
inode_dec_link_count(old_dir);
--
2.47.2
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x ee40c9920ac286c5bfe7c811e66ff899266d2582
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052601-directive-strep-6b85@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn(a)igalia.com>
Date: Fri, 23 May 2025 14:19:10 +0200
Subject: [PATCH] mm: fix copy_vma() error handling for hugetlb mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 55e6796b24d0..6a3cf7935c14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
diff --git a/mm/mremap.c b/mm/mremap.c
index 7db9da609c84..0d4948b720e2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c88..a468d4c29c0c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
inline data handling has a race between writing and writing to a memory
map.
When ext4_page_mkwrite is called, it calls ext4_convert_inline_data, which
destroys the inline data, but if block allocation fails, restores the
inline data. In that process, we could have:
CPU1 CPU2
destroy_inline_data
write_begin (does not see inline data)
restory_inline_data
write_end (sees inline data)
This leads to bugs like the one below, as write_begin did not prepare for
the case of inline data, which is expected by the write_end side of it.
------------[ cut here ]------------
kernel BUG at fs/ext4/inline.c:235!
Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI
CPU: 1 UID: 0 PID: 5838 Comm: syz-executor110 Not tainted 6.13.0-rc3-syzkaller-00209-g499551201b5f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
RIP: 0010:ext4_write_inline_data fs/ext4/inline.c:235 [inline]
RIP: 0010:ext4_write_inline_data_end+0xdc7/0xdd0 fs/ext4/inline.c:774
Code: 47 1d 8c e8 4b 3a 91 ff 90 0f 0b e8 63 7a 47 ff 48 8b 7c 24 10 48 c7 c6 e0 47 1d 8c e8 32 3a 91 ff 90 0f 0b e8 4a 7a 47 ff 90 <0f> 0b 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90
RSP: 0018:ffffc900031c7320 EFLAGS: 00010293
RAX: ffffffff8257f9a6 RBX: 000000000000005a RCX: ffff888012968000
RDX: 0000000000000000 RSI: 000000000000005a RDI: 000000000000005b
RBP: ffffc900031c7448 R08: ffffffff8257ef87 R09: 1ffff11006806070
R10: dffffc0000000000 R11: ffffed1006806071 R12: 000000000000005a
R13: dffffc0000000000 R14: ffff888076b65bd8 R15: 000000000000005b
FS: 00007f5c6bacf6c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000a00 CR3: 0000000073fb6000 CR4: 0000000000350ef0
Call Trace:
<TASK>
generic_perform_write+0x6f8/0x990 mm/filemap.c:4070
ext4_buffered_write_iter+0xc5/0x350 fs/ext4/file.c:299
ext4_file_write_iter+0x892/0x1c50
iter_file_splice_write+0xbfc/0x1510 fs/splice.c:743
do_splice_from fs/splice.c:941 [inline]
direct_splice_actor+0x11d/0x220 fs/splice.c:1164
splice_direct_to_actor+0x588/0xc80 fs/splice.c:1108
do_splice_direct_actor fs/splice.c:1207 [inline]
do_splice_direct+0x289/0x3e0 fs/splice.c:1233
do_sendfile+0x564/0x8a0 fs/read_write.c:1363
__do_sys_sendfile64 fs/read_write.c:1424 [inline]
__se_sys_sendfile64+0x17c/0x1e0 fs/read_write.c:1410
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f5c6bb18d09
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f5c6bacf218 EFLAGS: 00000246 ORIG_RAX: 0000000000000028
RAX: ffffffffffffffda RBX: 00007f5c6bba0708 RCX: 00007f5c6bb18d09
RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000004
RBP: 00007f5c6bba0700 R08: 0000000000000000 R09: 0000000000000000
R10: 000080001d00c0d0 R11: 0000000000000246 R12: 00007f5c6bb6d620
R13: 00007f5c6bb6d0c0 R14: 0031656c69662f2e R15: 8088e3ad122bc192
</TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
This happens because ext4_page_mkwrite is not protected by the inode_lock.
The xattr semaphore is not sufficient to protect inline data handling in a
sane way, so we need to rely on the inode_lock. Adding the inode_lock to
ext4_page_mkwrite is not an option, otherwise lock-ordering problems with
mmap_lock may arise.
The conversion inside ext4_page_mkwrite was introduced at commit
7b4cc9787fe3 ("ext4: evict inline data when writing to memory map"). This
fixes a documented bug in the commit message, which suggests some
alternatives fixes.
The alternative of keeping the data inline is left as an exercise for the
reader.
Instead, block allocation still happens, just as it does right now. But
removal of the inline data is only done when pages are written back.
Fixes: 7b4cc9787fe3 ("ext4: evict inline data when writing to memory map")
Reported-by: syzbot+0c89d865531d053abb2d(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=0c89d865531d053abb2d
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)igalia.com>
Cc: stable(a)vger.kernel.org
---
fs/ext4/inode.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 94c7d2d828a64e42ded09c82497ed7617071aa19..38653d5c8b32ede2b130285ab13ef1e96b1ba783 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2620,8 +2620,7 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
ret = PTR_ERR(handle);
goto out_writepages;
}
- BUG_ON(ext4_test_inode_state(inode,
- EXT4_STATE_MAY_INLINE_DATA));
+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
ext4_destroy_inline_data(handle, inode);
ext4_journal_stop(handle);
}
@@ -6222,10 +6221,6 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
filemap_invalidate_lock_shared(mapping);
- err = ext4_convert_inline_data(inode);
- if (err)
- goto out_ret;
-
/*
* On data journalling we skip straight to the transaction handle:
* there's no delalloc; page truncated will be checked later; the
---
base-commit: a5806cd506af5a7c19bcd596e4708b5c464bfd21
change-id: 20250519-ext4_inline_page_mkwrite-c42ca1f02295
Best regards,
--
Thadeu Lima de Souza Cascardo <cascardo(a)igalia.com>
Hello,
with gcc 15.1.1 i see the following build issue on 6.6.91:
CC arch/x86/boot/a20.o
In file included from ./include/uapi/linux/posix_types.h:5,
from ./include/uapi/linux/types.h:14,
from ./include/linux/types.h:6,
from arch/x86/boot/boot.h:22,
from arch/x86/boot/a20.c:14:
./include/linux/stddef.h:11:9: error: cannot use keyword 'false' as
enumeration constant
11 | false = 0,
| ^~~~~
./include/linux/stddef.h:11:9: note: 'false' is a keyword with
'-std=c23' onwards
./include/linux/types.h:35:33: error: 'bool' cannot be defined via 'typedef'
35 | typedef _Bool bool;
| ^~~~
./include/linux/types.h:35:33: note: 'bool' is a keyword with '-std=c23'
onwards
./include/linux/types.h:35:1: warning: useless type name in empty
declaration
35 | typedef _Bool bool;
| ^~~~~~~
make[2]: *** [scripts/Makefile.build:243: arch/x86/boot/a20.o] Error 1
make[1]: *** [arch/x86/Makefile:284: bzImage] Error 2
Fixed by b3bee1e7c3f2b1b77182302c7b2131c804175870 (x86/boot: Compile
boot code with -std=gnu11 too), which hasn't been backported yet.
Should probably go into all stable trees.
Regards,
Hendrik
This series introduces a new metadata format for UVC cameras and adds a
couple of improvements to the UVC metadata handling.
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
---
Changes in v5:
- Fix codestyle and kerneldoc warnings reported by media-ci
- Link to v4: https://lore.kernel.org/r/20250403-uvc-meta-v4-0-877aa6475975@chromium.org
Changes in v4:
- Rename format to V4L2_META_FMT_UVC_MSXU_1_5 (Thanks Mauro)
- Flag the new format with a quirk.
- Autodetect MSXU devices.
- Link to v3: https://lore.kernel.org/linux-media/20250313-uvc-metadata-v3-0-c467af869c60…
Changes in v3:
- Fix doc syntax errors.
- Link to v2: https://lore.kernel.org/r/20250306-uvc-metadata-v2-0-7e939857cad5@chromium.…
Changes in v2:
- Add metadata invalid fix
- Move doc note to a separate patch
- Introuce V4L2_META_FMT_UVC_CUSTOM (thanks HdG!).
- Link to v1: https://lore.kernel.org/r/20250226-uvc-metadata-v1-1-6cd6fe5ec2cb@chromium.…
---
Ricardo Ribalda (4):
media: uvcvideo: Do not mark valid metadata as invalid
media: Documentation: Add note about UVCH length field
media: uvcvideo: Introduce V4L2_META_FMT_UVC_MSXU_1_5
media: uvcvideo: Auto-set UVC_QUIRK_MSXU_META
.../userspace-api/media/v4l/meta-formats.rst | 1 +
.../media/v4l/metafmt-uvc-msxu-1-5.rst | 23 +++++
.../userspace-api/media/v4l/metafmt-uvc.rst | 4 +-
MAINTAINERS | 1 +
drivers/media/usb/uvc/uvc_metadata.c | 97 ++++++++++++++++++++--
drivers/media/usb/uvc/uvc_video.c | 12 +--
drivers/media/usb/uvc/uvcvideo.h | 1 +
drivers/media/v4l2-core/v4l2-ioctl.c | 1 +
include/linux/usb/uvc.h | 3 +
include/uapi/linux/videodev2.h | 1 +
10 files changed, 131 insertions(+), 13 deletions(-)
---
base-commit: 4e82c87058f45e79eeaa4d5bcc3b38dd3dce7209
change-id: 20250403-uvc-meta-e556773d12ae
Best regards,
--
Ricardo Ribalda <ribalda(a)chromium.org>
On Wed, May 21, 2025 at 06:37:41AM +0000, Parav Pandit wrote:
> When the PCI device is surprise removed, requests may not complete
> the device as the VQ is marked as broken. Due to this, the disk
> deletion hangs.
>
> Fix it by aborting the requests when the VQ is broken.
>
> With this fix now fio completes swiftly.
> An alternative of IO timeout has been considered, however
> when the driver knows about unresponsive block device, swiftly clearing
> them enables users and upper layers to react quickly.
>
> Verified with multiple device unplug iterations with pending requests in
> virtio used ring and some pending with the device.
>
> Fixes: 43bb40c5b926 ("virtio_pci: Support surprise removal of virtio pci device")
> Cc: stable(a)vger.kernel.org
> Reported-by: lirongqing(a)baidu.com
> Closes: https://lore.kernel.org/virtualization/c45dd68698cd47238c55fb73ca9b4741@bai…
> Reviewed-by: Max Gurtovoy <mgurtovoy(a)nvidia.com>
> Reviewed-by: Israel Rukshin <israelr(a)nvidia.com>
> Signed-off-by: Parav Pandit <parav(a)nvidia.com>
> ---
> changelog:
> v0->v1:
> - Fixed comments from Stefan to rename a cleanup function
> - Improved logic for handling any outstanding requests
> in bio layer
> - improved cancel callback to sync with ongoing done()
>
> ---
> drivers/block/virtio_blk.c | 95 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 95 insertions(+)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 7cffea01d868..5212afdbd3c7 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -435,6 +435,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> blk_status_t status;
> int err;
>
> + /* Immediately fail all incoming requests if the vq is broken.
> + * Once the queue is unquiesced, upper block layer flushes any pending
> + * queued requests; fail them right away.
> + */
> + if (unlikely(virtqueue_is_broken(vblk->vqs[qid].vq)))
> + return BLK_STS_IOERR;
> +
> status = virtblk_prep_rq(hctx, vblk, req, vbr);
> if (unlikely(status))
> return status;
> @@ -508,6 +515,11 @@ static void virtio_queue_rqs(struct rq_list *rqlist)
> while ((req = rq_list_pop(rqlist))) {
> struct virtio_blk_vq *this_vq = get_virtio_blk_vq(req->mq_hctx);
>
> + if (unlikely(virtqueue_is_broken(this_vq->vq))) {
> + rq_list_add_tail(&requeue_list, req);
> + continue;
> + }
> +
> if (vq && vq != this_vq)
> virtblk_add_req_batch(vq, &submit_list);
> vq = this_vq;
> @@ -1554,6 +1566,87 @@ static int virtblk_probe(struct virtio_device *vdev)
> return err;
> }
>
> +static bool virtblk_request_cancel(struct request *rq, void *data)
> +{
> + struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
> + struct virtio_blk *vblk = data;
> + struct virtio_blk_vq *vq;
> + unsigned long flags;
> +
> + vq = &vblk->vqs[rq->mq_hctx->queue_num];
> +
> + spin_lock_irqsave(&vq->lock, flags);
> +
> + vbr->in_hdr.status = VIRTIO_BLK_S_IOERR;
> + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq))
> + blk_mq_complete_request(rq);
> +
> + spin_unlock_irqrestore(&vq->lock, flags);
> + return true;
> +}
> +
> +static void virtblk_broken_device_cleanup(struct virtio_blk *vblk)
> +{
> + struct request_queue *q = vblk->disk->queue;
> +
> + if (!virtqueue_is_broken(vblk->vqs[0].vq))
> + return;
Can a subset of virtqueues be broken? If so, then this code doesn't
handle it.
> +
> + /* Start freezing the queue, so that new requests keeps waitng at the
s/waitng/waiting/
> + * door of bio_queue_enter(). We cannot fully freeze the queue because
> + * freezed queue is an empty queue and there are pending requests, so
> + * only start freezing it.
> + */
> + blk_freeze_queue_start(q);
> +
> + /* When quiescing completes, all ongoing dispatches have completed
> + * and no new dispatch will happen towards the driver.
> + * This ensures that later when cancel is attempted, then are not
> + * getting processed by the queue_rq() or queue_rqs() handlers.
> + */
> + blk_mq_quiesce_queue(q);
> +
> + /*
> + * Synchronize with any ongoing VQ callbacks, effectively quiescing
> + * the device and preventing it from completing further requests
> + * to the block layer. Any outstanding, incomplete requests will be
> + * completed by virtblk_request_cancel().
> + */
> + virtio_synchronize_cbs(vblk->vdev);
> +
> + /* At this point, no new requests can enter the queue_rq() and
> + * completion routine will not complete any new requests either for the
> + * broken vq. Hence, it is safe to cancel all requests which are
> + * started.
> + */
> + blk_mq_tagset_busy_iter(&vblk->tag_set, virtblk_request_cancel, vblk);
Although virtio_synchronize_cbs() was called, a broken/malicious device
can still raise IRQs. Would that lead to use-after-free or similar
undefined behavior for requests that have been submitted to the device?
It seems safer to reset the device before marking the requests as
failed.
> + blk_mq_tagset_wait_completed_request(&vblk->tag_set);
> +
> + /* All pending requests are cleaned up. Time to resume so that disk
> + * deletion can be smooth. Start the HW queues so that when queue is
> + * unquiesced requests can again enter the driver.
> + */
> + blk_mq_start_stopped_hw_queues(q, true);
> +
> + /* Unquiescing will trigger dispatching any pending requests to the
> + * driver which has crossed bio_queue_enter() to the driver.
> + */
> + blk_mq_unquiesce_queue(q);
> +
> + /* Wait for all pending dispatches to terminate which may have been
> + * initiated after unquiescing.
> + */
> + blk_mq_freeze_queue_wait(q);
> +
> + /* Mark the disk dead so that once queue unfreeze, the requests
> + * waiting at the door of bio_queue_enter() can be aborted right away.
> + */
> + blk_mark_disk_dead(vblk->disk);
> +
> + /* Unfreeze the queue so that any waiting requests will be aborted. */
> + blk_mq_unfreeze_queue_nomemrestore(q);
> +}
> +
> static void virtblk_remove(struct virtio_device *vdev)
> {
> struct virtio_blk *vblk = vdev->priv;
> @@ -1561,6 +1654,8 @@ static void virtblk_remove(struct virtio_device *vdev)
> /* Make sure no work handler is accessing the device. */
> flush_work(&vblk->config_work);
>
> + virtblk_broken_device_cleanup(vblk);
> +
> del_gendisk(vblk->disk);
> blk_mq_free_tag_set(&vblk->tag_set);
>
> --
> 2.34.1
>
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x ee40c9920ac286c5bfe7c811e66ff899266d2582
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052659-snugly-tag-6f43@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn(a)igalia.com>
Date: Fri, 23 May 2025 14:19:10 +0200
Subject: [PATCH] mm: fix copy_vma() error handling for hugetlb mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 55e6796b24d0..6a3cf7935c14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
diff --git a/mm/mremap.c b/mm/mremap.c
index 7db9da609c84..0d4948b720e2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c88..a468d4c29c0c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x ee40c9920ac286c5bfe7c811e66ff899266d2582
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052654-retired-handling-209f@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn(a)igalia.com>
Date: Fri, 23 May 2025 14:19:10 +0200
Subject: [PATCH] mm: fix copy_vma() error handling for hugetlb mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 55e6796b24d0..6a3cf7935c14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
diff --git a/mm/mremap.c b/mm/mremap.c
index 7db9da609c84..0d4948b720e2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c88..a468d4c29c0c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x ee40c9920ac286c5bfe7c811e66ff899266d2582
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052652-bloated-preoccupy-3ffb@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn(a)igalia.com>
Date: Fri, 23 May 2025 14:19:10 +0200
Subject: [PATCH] mm: fix copy_vma() error handling for hugetlb mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 55e6796b24d0..6a3cf7935c14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
diff --git a/mm/mremap.c b/mm/mremap.c
index 7db9da609c84..0d4948b720e2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c88..a468d4c29c0c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x ee40c9920ac286c5bfe7c811e66ff899266d2582
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052651-capably-displease-37db@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn(a)igalia.com>
Date: Fri, 23 May 2025 14:19:10 +0200
Subject: [PATCH] mm: fix copy_vma() error handling for hugetlb mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 55e6796b24d0..6a3cf7935c14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
diff --git a/mm/mremap.c b/mm/mremap.c
index 7db9da609c84..0d4948b720e2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c88..a468d4c29c0c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x ee40c9920ac286c5bfe7c811e66ff899266d2582
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052649-cymbal-eggplant-f524@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn(a)igalia.com>
Date: Fri, 23 May 2025 14:19:10 +0200
Subject: [PATCH] mm: fix copy_vma() error handling for hugetlb mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 55e6796b24d0..6a3cf7935c14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
diff --git a/mm/mremap.c b/mm/mremap.c
index 7db9da609c84..0d4948b720e2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c88..a468d4c29c0c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
The patch below does not apply to the 6.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.14.y
git checkout FETCH_HEAD
git cherry-pick -x ee40c9920ac286c5bfe7c811e66ff899266d2582
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052647-jokester-paralyses-8a1a@gregkh' --subject-prefix 'PATCH 6.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn(a)igalia.com>
Date: Fri, 23 May 2025 14:19:10 +0200
Subject: [PATCH] mm: fix copy_vma() error handling for hugetlb mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 55e6796b24d0..6a3cf7935c14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
diff --git a/mm/mremap.c b/mm/mremap.c
index 7db9da609c84..0d4948b720e2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c88..a468d4c29c0c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
The function ieee80211_chsw_switch_vifs() calls the function
ieee80211_link_get_chanctx(), but does not check its return value.
The return value is a null pointer if the ieee80211_link_get_chanctx()
fails. This will lead to a null pointer dereference in the following
code "&old_ctx->conf". A proper implementation can be found in
ieee80211_link_use_reserved_assign().
Add a null pointer check and goto error handling path if the
function fails.
Fixes: 5d52ee811019 ("mac80211: allow reservation of a running chanctx")
Cc: stable(a)vger.kernel.org # v3.16
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
v2: Fix code error.
net/mac80211/chan.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index a442cb667520..c9b703c283e7 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1503,6 +1503,10 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
continue;
old_ctx = ieee80211_link_get_chanctx(link);
+ if (WARN_ON(!old_ctx)) {
+ err = -EINVAL;
+ goto out;
+ }
vif_chsw[i].vif = &link->sdata->vif;
vif_chsw[i].old_ctx = &old_ctx->conf;
vif_chsw[i].new_ctx = &ctx->conf;
--
2.42.0.windows.2
The function sunxi_nfc_hw_ecc_write_chunk() calls the
sunxi_nfc_hw_ecc_write_chunk(), but does not call the configuration
function sunxi_nfc_randomizer_config(). Consequently, the randomization
might not conduct correctly, which will affect the lifespan of NAND flash.
A proper implementation can be found in sunxi_nfc_hw_ecc_write_page_dma().
Add the sunxi_nfc_randomizer_config() to config randomizer.
Fixes: 4be4e03efc7f ("mtd: nand: sunxi: add randomizer support")
Cc: stable(a)vger.kernel.org # v4.6
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/mtd/nand/raw/sunxi_nand.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 9179719f639e..162cd5f4f234 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1050,6 +1050,7 @@ static int sunxi_nfc_hw_ecc_write_chunk(struct nand_chip *nand,
if (ret)
return ret;
+ sunxi_nfc_randomizer_config(nand, page, false);
sunxi_nfc_randomizer_enable(nand);
sunxi_nfc_hw_ecc_set_prot_oob_bytes(nand, oob, 0, bbm, page);
--
2.42.0.windows.2
The function ieee80211_chsw_switch_vifs() calls the function
ieee80211_link_get_chanctx(), but does not check its return value.
The return value is a null pointer if the ieee80211_link_get_chanctx()
fails. This will lead to a null pointer dereference in the following
code "&old_ctx->conf". A proper implementation can be found in
ieee80211_link_use_reserved_assign().
Add a null pointer check and goto error handling path if the
function fails.
Fixes: 5d52ee811019 ("mac80211: allow reservation of a running chanctx")
Cc: stable(a)vger.kernel.org # v3.16
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
net/mac80211/chan.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index a442cb667520..9e6235001e0a 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1503,6 +1503,10 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
continue;
old_ctx = ieee80211_link_get_chanctx(link);
+ if (WARN_ON(old_ctx)) {
+ err = -EINVAL;
+ goto out;
+ }
vif_chsw[i].vif = &link->sdata->vif;
vif_chsw[i].old_ctx = &old_ctx->conf;
vif_chsw[i].new_ctx = &ctx->conf;
--
2.42.0.windows.2
Hi,
We (Ateme, a video encoding company) may have found an unwanted behavior in the scheduler since 5.10 (commit 16b0a7a1a0af), then 5.16 (commit c5b0a7eefc70), then 5.19 (commit not found yet), then maybe some other commits from 5.19 to 6.12, with a consequence of IPC decrease. Problem still appears on lasts 6.12, 6.13 and 6.14
We have reverted both 16b0a7a1a0af and c5b0a7eefc70 commits that reduce our performances (see fair.patch attached, applicable on 6.12.17). Performances increase but still doesnt reach our reference on 5.4.152.
Instead of trying to find every single commits from 5.18 to 6.12 that could decrease our performance, I chosed to bench 5.4.152 versus 6.12.17 with and without fair.patch.
The problem appeared clear : a lot of CPU migrations go out of CCX, then L3 miss, then IPC decrease.
Context of our bench: video decoder which work at a regulated speed, 1 process, 21 main threads, everyone of them creates 10 threads, 8 of them have a fine granularity, meaning they go to sleep quite often, giving the scheduler a lot of opportunities to act).
Hardware is an AMD Epyc 7702P, 128 cores, grouped by shared LLC 4 cores +4 hyperthreaded cores. NUMA topology is set by the BIOS to 1 node per socket.
Every pthread are created with default attributes.
I use AMDuProf (-C -A system -a -m ipc,l1,l2,l3,memory) for CPU Utilization (%), CPU effective freq, IPC, L2 access (pti), L2 miss (pti), L3 miss (absolute) and Mem (GB/s, and perf (stat -d -d -d -a) for Context switches, CPU migrations and Real time (s).
We noted that upgrade 5.4.152 to 6.12.17 without any special preempt configuration :
Two fold increase in CPU migration
30% memory bandwidth increase
20% L3 cache misses increase
10% IPC decrease
With the attached fair.patch applied to 6.12.17 (reminder : this patch reverts one commit appeared in 5.10 and another in 5.16) we managed to reduce CPU migrations and increase IPC but not as much as we had on 5.4.152. Our goal is to keep kernel "clean" without any patch (we don't want to apply and maintain fair.patch) then for the rest of my email we will consider stock kernel 6.12.17.
I've reduced the "sub threads count" to stays below 128 threads. Then still 21 main threads and instead of 10 worker per main thread I set 5 workers (4 of them with fine granularity) giving 105 pthreads -> everything goes fine in 6.12.17, no extra CPU migration, no extra memory bandwidth...
But as soon as we increase worker threads count (10 instead of 5) the problem appears.
We know our decoder may have too many threads but that's out of our scope, it has been designed like that some years ago and moving from "lot of small threads to few of big thread" is for now not possible.
We have a work around : we group threads using pthread affinities. Every main thread (and by inheritance of affinities every worker threads) on a single CCX so we reduce the L3 miss for them, then decrease memory bandwidth, then finally increasing IPC.
With that solution, we go above our original performances, for both kernels, and they perform at the same level. However, it is impractical to productize as such.
I've tried many kernel build configurations (CONFIG_PREMPT_*, CONFIG_SCHEDULER_*, tuning of fair.c:sysctl_sched_migration_cost) on 6.12.17, 6.12.21 (longterm), 6.13.9 (mainline), and 6.14.0 Nothing changes.
Q: Is there anyway to tune the kernel so we can get our performance back without using the pthread affinities work around ?
Feel free to ask an archive containing binaries and payload.
I first posted on https://bugzilla.kernel.org/show_bug.cgi?id=220000 but one told me the best way to get answers where these mailing lists
Regards,
Jean-Baptiste Roquefere, Ateme
Attached bench.tar.gz :
* bench/fair.patch
* bench/bench.ods with 2 sheets :
- regulated : decoder speed is regulated to keep real time constant
- no regul : decoder speed is not regulated and uses from 1 to 76 main threads with 10 worker per main thread
* bench/regulated.csv : bench.ods:regulated exported in csv format
* bench/not-regulated : bench.ods:no regul exported in csv format
The patch below does not apply to the 6.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.14.y
git checkout FETCH_HEAD
git cherry-pick -x 08f959759e1e6e9c4b898c51a7d387ac3480630b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052516-factsheet-coagulant-3fb0@gregkh' --subject-prefix 'PATCH 6.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08f959759e1e6e9c4b898c51a7d387ac3480630b Mon Sep 17 00:00:00 2001
From: Nicolas Frattaroli <nicolas.frattaroli(a)collabora.com>
Date: Wed, 23 Apr 2025 09:53:32 +0200
Subject: [PATCH] mmc: sdhci-of-dwcmshc: add PD workaround on RK3576
RK3576's power domains have a peculiar design where the PD_NVM power
domain, of which the sdhci controller is a part, seemingly does not have
idempotent runtime disable/enable. The end effect is that if PD_NVM gets
turned off by the generic power domain logic because all the devices
depending on it are suspended, then the next time the sdhci device is
unsuspended, it'll hang the SoC as soon as it tries accessing the CQHCI
registers.
RK3576's UFS support needed a new dev_pm_genpd_rpm_always_on function
added to the generic power domains API to handle what appears to be a
similar hardware design.
Use this new function to ask for the same treatment in the sdhci
controller by giving rk3576 its own platform data with its own postinit
function. The benefit of doing this instead of marking the power domains
always on in the power domain core is that we only do this if we know
the platform we're running on actually uses the sdhci controller. For
others, keeping PD_NVM always on would be a waste, as they won't run
into this specific issue. The only other IP in PD_NVM that could be
affected is FSPI0. If it gets a mainline driver, it will probably want
to do the same thing.
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli(a)collabora.com>
Reviewed-by: Shawn Lin <shawn.lin(a)rock-chips.com>
Fixes: cfee1b507758 ("pmdomain: rockchip: Add support for RK3576 SoC")
Cc: <stable(a)vger.kernel.org> # v6.15+
Link: https://lore.kernel.org/r/20250423-rk3576-emmc-fix-v3-1-0bf80e29967f@collab…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 09b9ab15e499..a20d03fdd6a9 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/sizes.h>
@@ -745,6 +746,29 @@ static void dwcmshc_rk35xx_postinit(struct sdhci_host *host, struct dwcmshc_priv
}
}
+static void dwcmshc_rk3576_postinit(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
+{
+ struct device *dev = mmc_dev(host->mmc);
+ int ret;
+
+ /*
+ * This works around the design of the RK3576's power domains, which
+ * makes the PD_NVM power domain, which the sdhci controller on the
+ * RK3576 is in, never come back the same way once it's run-time
+ * suspended once. This can happen during early kernel boot if no driver
+ * is using either PD_NVM or its child power domain PD_SDGMAC for a
+ * short moment, leading to it being turned off to save power. By
+ * keeping it on, sdhci suspending won't lead to PD_NVM becoming a
+ * candidate for getting turned off.
+ */
+ ret = dev_pm_genpd_rpm_always_on(dev, true);
+ if (ret && ret != -EOPNOTSUPP)
+ dev_warn(dev, "failed to set PD rpm always on, SoC may hang later: %pe\n",
+ ERR_PTR(ret));
+
+ dwcmshc_rk35xx_postinit(host, dwc_priv);
+}
+
static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -1176,6 +1200,18 @@ static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk35xx_pdata = {
.postinit = dwcmshc_rk35xx_postinit,
};
+static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk3576_pdata = {
+ .pdata = {
+ .ops = &sdhci_dwcmshc_rk35xx_ops,
+ .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
+ },
+ .init = dwcmshc_rk35xx_init,
+ .postinit = dwcmshc_rk3576_postinit,
+};
+
static const struct dwcmshc_pltfm_data sdhci_dwcmshc_th1520_pdata = {
.pdata = {
.ops = &sdhci_dwcmshc_th1520_ops,
@@ -1274,6 +1310,10 @@ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = {
.compatible = "rockchip,rk3588-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
},
+ {
+ .compatible = "rockchip,rk3576-dwcmshc",
+ .data = &sdhci_dwcmshc_rk3576_pdata,
+ },
{
.compatible = "rockchip,rk3568-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 08f959759e1e6e9c4b898c51a7d387ac3480630b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052515-headdress-overcome-c741@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08f959759e1e6e9c4b898c51a7d387ac3480630b Mon Sep 17 00:00:00 2001
From: Nicolas Frattaroli <nicolas.frattaroli(a)collabora.com>
Date: Wed, 23 Apr 2025 09:53:32 +0200
Subject: [PATCH] mmc: sdhci-of-dwcmshc: add PD workaround on RK3576
RK3576's power domains have a peculiar design where the PD_NVM power
domain, of which the sdhci controller is a part, seemingly does not have
idempotent runtime disable/enable. The end effect is that if PD_NVM gets
turned off by the generic power domain logic because all the devices
depending on it are suspended, then the next time the sdhci device is
unsuspended, it'll hang the SoC as soon as it tries accessing the CQHCI
registers.
RK3576's UFS support needed a new dev_pm_genpd_rpm_always_on function
added to the generic power domains API to handle what appears to be a
similar hardware design.
Use this new function to ask for the same treatment in the sdhci
controller by giving rk3576 its own platform data with its own postinit
function. The benefit of doing this instead of marking the power domains
always on in the power domain core is that we only do this if we know
the platform we're running on actually uses the sdhci controller. For
others, keeping PD_NVM always on would be a waste, as they won't run
into this specific issue. The only other IP in PD_NVM that could be
affected is FSPI0. If it gets a mainline driver, it will probably want
to do the same thing.
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli(a)collabora.com>
Reviewed-by: Shawn Lin <shawn.lin(a)rock-chips.com>
Fixes: cfee1b507758 ("pmdomain: rockchip: Add support for RK3576 SoC")
Cc: <stable(a)vger.kernel.org> # v6.15+
Link: https://lore.kernel.org/r/20250423-rk3576-emmc-fix-v3-1-0bf80e29967f@collab…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 09b9ab15e499..a20d03fdd6a9 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/sizes.h>
@@ -745,6 +746,29 @@ static void dwcmshc_rk35xx_postinit(struct sdhci_host *host, struct dwcmshc_priv
}
}
+static void dwcmshc_rk3576_postinit(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
+{
+ struct device *dev = mmc_dev(host->mmc);
+ int ret;
+
+ /*
+ * This works around the design of the RK3576's power domains, which
+ * makes the PD_NVM power domain, which the sdhci controller on the
+ * RK3576 is in, never come back the same way once it's run-time
+ * suspended once. This can happen during early kernel boot if no driver
+ * is using either PD_NVM or its child power domain PD_SDGMAC for a
+ * short moment, leading to it being turned off to save power. By
+ * keeping it on, sdhci suspending won't lead to PD_NVM becoming a
+ * candidate for getting turned off.
+ */
+ ret = dev_pm_genpd_rpm_always_on(dev, true);
+ if (ret && ret != -EOPNOTSUPP)
+ dev_warn(dev, "failed to set PD rpm always on, SoC may hang later: %pe\n",
+ ERR_PTR(ret));
+
+ dwcmshc_rk35xx_postinit(host, dwc_priv);
+}
+
static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -1176,6 +1200,18 @@ static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk35xx_pdata = {
.postinit = dwcmshc_rk35xx_postinit,
};
+static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk3576_pdata = {
+ .pdata = {
+ .ops = &sdhci_dwcmshc_rk35xx_ops,
+ .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
+ },
+ .init = dwcmshc_rk35xx_init,
+ .postinit = dwcmshc_rk3576_postinit,
+};
+
static const struct dwcmshc_pltfm_data sdhci_dwcmshc_th1520_pdata = {
.pdata = {
.ops = &sdhci_dwcmshc_th1520_ops,
@@ -1274,6 +1310,10 @@ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = {
.compatible = "rockchip,rk3588-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
},
+ {
+ .compatible = "rockchip,rk3576-dwcmshc",
+ .data = &sdhci_dwcmshc_rk3576_pdata,
+ },
{
.compatible = "rockchip,rk3568-dwcmshc",
.data = &sdhci_dwcmshc_rk35xx_pdata,
The quilt patch titled
Subject: mm: fix copy_vma() error handling for hugetlb mappings
has been removed from the -mm tree. Its filename was
mm-fix-copy_vma-error-handling-for-hugetlb-mappings.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ricardo Ca��uelo Navarro <rcn(a)igalia.com>
Subject: mm: fix copy_vma() error handling for hugetlb mappings
Date: Fri, 23 May 2025 14:19:10 +0200
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Ca��uelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 16 +++++++++++++++-
mm/mremap.c | 3 +--
mm/vma.c | 1 +
4 files changed, 22 insertions(+), 3 deletions(-)
--- a/include/linux/hugetlb.h~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(s
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
--- a/mm/hugetlb.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_a
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
--- a/mm/mremap.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
--- a/mm/vma.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct v
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
_
Patches currently in -mm which might be from rcn(a)igalia.com are
The quilt patch titled
Subject: memcg: always call cond_resched() after fn()
has been removed from the -mm tree. Its filename was
memcg-always-call-cond_resched-after-fn.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Breno Leitao <leitao(a)debian.org>
Subject: memcg: always call cond_resched() after fn()
Date: Fri, 23 May 2025 10:21:06 -0700
I am seeing soft lockup on certain machine types when a cgroup OOMs. This
is happening because killing the process in certain machine might be very
slow, which causes the soft lockup and RCU stalls. This happens usually
when the cgroup has MANY processes and memory.oom.group is set.
Example I am seeing in real production:
[462012.244552] Memory cgroup out of memory: Killed process 3370438 (crosvm) ....
....
[462037.318059] Memory cgroup out of memory: Killed process 4171372 (adb) ....
[462037.348314] watchdog: BUG: soft lockup - CPU#64 stuck for 26s! [stat_manager-ag:1618982]
....
Quick look at why this is so slow, it seems to be related to serial flush
for certain machine types. For all the crashes I saw, the target CPU was
at console_flush_all().
In the case above, there are thousands of processes in the cgroup, and it
is soft locking up before it reaches the 1024 limit in the code (which
would call the cond_resched()). So, cond_resched() in 1024 blocks is not
sufficient.
Remove the counter-based conditional rescheduling logic and call
cond_resched() unconditionally after each task iteration, after fn() is
called. This avoids the lockup independently of how slow fn() is.
Link: https://lkml.kernel.org/r/20250523-memcg_fix-v1-1-ad3eafb60477@debian.org
Fixes: ade81479c7dd ("memcg: fix soft lockup in the OOM process")
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Suggested-by: Rik van Riel <riel(a)surriel.com>
Acked-by: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Michael van der Westhuizen <rmikey(a)meta.com>
Cc: Usama Arif <usamaarif642(a)gmail.com>
Cc: Pavel Begunkov <asml.silence(a)gmail.com>
Cc: Chen Ridong <chenridong(a)huawei.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
--- a/mm/memcontrol.c~memcg-always-call-cond_resched-after-fn
+++ a/mm/memcontrol.c
@@ -1168,7 +1168,6 @@ void mem_cgroup_scan_tasks(struct mem_cg
{
struct mem_cgroup *iter;
int ret = 0;
- int i = 0;
BUG_ON(mem_cgroup_is_root(memcg));
@@ -1178,10 +1177,9 @@ void mem_cgroup_scan_tasks(struct mem_cg
css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
while (!ret && (task = css_task_iter_next(&it))) {
- /* Avoid potential softlockup warning */
- if ((++i & 1023) == 0)
- cond_resched();
ret = fn(task, arg);
+ /* Avoid potential softlockup warning */
+ cond_resched();
}
css_task_iter_end(&it);
if (ret) {
_
Patches currently in -mm which might be from leitao(a)debian.org are
The quilt patch titled
Subject: mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios
has been removed from the -mm tree. Its filename was
mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ge Yang <yangge1116(a)126.com>
Subject: mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios
Date: Thu, 22 May 2025 11:22:17 +0800
A kernel crash was observed when replacing free hugetlb folios:
BUG: kernel NULL pointer dereference, address: 0000000000000028
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP NOPTI
CPU: 28 UID: 0 PID: 29639 Comm: test_cma.sh Tainted 6.15.0-rc6-zp #41 PREEMPT(voluntary)
RIP: 0010:alloc_and_dissolve_hugetlb_folio+0x1d/0x1f0
RSP: 0018:ffffc9000b30fa90 EFLAGS: 00010286
RAX: 0000000000000000 RBX: 0000000000342cca RCX: ffffea0043000000
RDX: ffffc9000b30fb08 RSI: ffffea0043000000 RDI: 0000000000000000
RBP: ffffc9000b30fb20 R08: 0000000000001000 R09: 0000000000000000
R10: ffff88886f92eb00 R11: 0000000000000000 R12: ffffea0043000000
R13: 0000000000000000 R14: 00000000010c0200 R15: 0000000000000004
FS: 00007fcda5f14740(0000) GS:ffff8888ec1d8000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 0000000391402000 CR4: 0000000000350ef0
Call Trace:
<TASK>
replace_free_hugepage_folios+0xb6/0x100
alloc_contig_range_noprof+0x18a/0x590
? srso_return_thunk+0x5/0x5f
? down_read+0x12/0xa0
? srso_return_thunk+0x5/0x5f
cma_range_alloc.constprop.0+0x131/0x290
__cma_alloc+0xcf/0x2c0
cma_alloc_write+0x43/0xb0
simple_attr_write_xsigned.constprop.0.isra.0+0xb2/0x110
debugfs_attr_write+0x46/0x70
full_proxy_write+0x62/0xa0
vfs_write+0xf8/0x420
? srso_return_thunk+0x5/0x5f
? filp_flush+0x86/0xa0
? srso_return_thunk+0x5/0x5f
? filp_close+0x1f/0x30
? srso_return_thunk+0x5/0x5f
? do_dup2+0xaf/0x160
? srso_return_thunk+0x5/0x5f
ksys_write+0x65/0xe0
do_syscall_64+0x64/0x170
entry_SYSCALL_64_after_hwframe+0x76/0x7e
There is a potential race between __update_and_free_hugetlb_folio() and
replace_free_hugepage_folios():
CPU1 CPU2
__update_and_free_hugetlb_folio replace_free_hugepage_folios
folio_test_hugetlb(folio)
-- It's still hugetlb folio.
__folio_clear_hugetlb(folio)
hugetlb_free_folio(folio)
h = folio_hstate(folio)
-- Here, h is NULL pointer
When the above race condition occurs, folio_hstate(folio) returns NULL,
and subsequent access to this NULL pointer will cause the system to crash.
To resolve this issue, execute folio_hstate(folio) under the protection
of the hugetlb_lock lock, ensuring that folio_hstate(folio) does not
return NULL.
Link: https://lkml.kernel.org/r/1747884137-26685-1-git-send-email-yangge1116@126.…
Fixes: 04f13d241b8b ("mm: replace free hugepage folios after migration")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 8 ++++++++
1 file changed, 8 insertions(+)
--- a/mm/hugetlb.c~mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios
+++ a/mm/hugetlb.c
@@ -2949,12 +2949,20 @@ int replace_free_hugepage_folios(unsigne
while (start_pfn < end_pfn) {
folio = pfn_folio(start_pfn);
+
+ /*
+ * The folio might have been dissolved from under our feet, so make sure
+ * to carefully check the state under the lock.
+ */
+ spin_lock_irq(&hugetlb_lock);
if (folio_test_hugetlb(folio)) {
h = folio_hstate(folio);
} else {
+ spin_unlock_irq(&hugetlb_lock);
start_pfn++;
continue;
}
+ spin_unlock_irq(&hugetlb_lock);
if (!folio_ref_count(folio)) {
ret = alloc_and_dissolve_hugetlb_folio(h, folio,
_
Patches currently in -mm which might be from yangge1116(a)126.com are
The quilt patch titled
Subject: mm: vmalloc: only zero-init on vrealloc shrink
has been removed from the -mm tree. Its filename was
mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kees Cook <kees(a)kernel.org>
Subject: mm: vmalloc: only zero-init on vrealloc shrink
Date: Thu, 15 May 2025 14:42:16 -0700
The common case is to grow reallocations, and since init_on_alloc will
have already zeroed the whole allocation, we only need to zero when
shrinking the allocation.
Link: https://lkml.kernel.org/r/20250515214217.619685-2-kees@kernel.org
Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing")
Signed-off-by: Kees Cook <kees(a)kernel.org>
Tested-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
Cc: Danilo Krummrich <dakr(a)kernel.org>
Cc: Eduard Zingerman <eddyz87(a)gmail.com>
Cc: "Erhard F." <erhard_f(a)mailbox.org>
Cc: Shung-Hsi Yu <shung-hsi.yu(a)suse.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmalloc.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
--- a/mm/vmalloc.c~mm-vmalloc-only-zero-init-on-vrealloc-shrink
+++ a/mm/vmalloc.c
@@ -4093,8 +4093,8 @@ void *vrealloc_noprof(const void *p, siz
* would be a good heuristic for when to shrink the vm_area?
*/
if (size <= old_size) {
- /* Zero out "freed" memory. */
- if (want_init_on_free())
+ /* Zero out "freed" memory, potentially for future realloc. */
+ if (want_init_on_free() || want_init_on_alloc(flags))
memset((void *)p + size, 0, old_size - size);
vm->requested_size = size;
kasan_poison_vmalloc(p + size, old_size - size);
@@ -4107,9 +4107,11 @@ void *vrealloc_noprof(const void *p, siz
if (size <= alloced_size) {
kasan_unpoison_vmalloc(p + old_size, size - old_size,
KASAN_VMALLOC_PROT_NORMAL);
- /* Zero out "alloced" memory. */
- if (want_init_on_alloc(flags))
- memset((void *)p + old_size, 0, size - old_size);
+ /*
+ * No need to zero memory here, as unused memory will have
+ * already been zeroed at initial allocation time or during
+ * realloc shrink time.
+ */
vm->requested_size = size;
return (void *)p;
}
_
Patches currently in -mm which might be from kees(a)kernel.org are
The quilt patch titled
Subject: mm: vmalloc: actually use the in-place vrealloc region
has been removed from the -mm tree. Its filename was
mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kees Cook <kees(a)kernel.org>
Subject: mm: vmalloc: actually use the in-place vrealloc region
Date: Thu, 15 May 2025 14:42:15 -0700
Patch series "mm: vmalloc: Actually use the in-place vrealloc region".
This fixes a performance regression[1] with vrealloc()[1].
The refactoring to not build a new vmalloc region only actually worked
when shrinking. Actually return the resized area when it grows. Ugh.
Link: https://lkml.kernel.org/r/20250515214217.619685-1-kees@kernel.org
Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing")
Signed-off-by: Kees Cook <kees(a)kernel.org>
Reported-by: Shung-Hsi Yu <shung-hsi.yu(a)suse.com>
Closes: https://lore.kernel.org/all/20250515-bpf-verifier-slowdown-vwo2meju4cgp2su5… [1]
Tested-by: Eduard Zingerman <eddyz87(a)gmail.com>
Tested-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
Tested-by: Shung-Hsi Yu <shung-hsi.yu(a)suse.com>
Reviewed-by: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Reviewed-by: Danilo Krummrich <dakr(a)kernel.org>
Cc: "Erhard F." <erhard_f(a)mailbox.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmalloc.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/vmalloc.c~mm-vmalloc-actually-use-the-in-place-vrealloc-region
+++ a/mm/vmalloc.c
@@ -4111,6 +4111,7 @@ void *vrealloc_noprof(const void *p, siz
if (want_init_on_alloc(flags))
memset((void *)p + old_size, 0, size - old_size);
vm->requested_size = size;
+ return (void *)p;
}
/* TODO: Grow the vm_area, i.e. allocate and map additional pages. */
_
Patches currently in -mm which might be from kees(a)kernel.org are
The quilt patch titled
Subject: module: release codetag section when module load fails
has been removed from the -mm tree. Its filename was
module-release-codetag-section-when-module-load-fails.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: David Wang <00107082(a)163.com>
Subject: module: release codetag section when module load fails
Date: Tue, 20 May 2025 00:38:23 +0800
When module load fails after memory for codetag section is ready, codetag
section memory will not be properly released. This causes memory leak,
and if next module load happens to get the same module address, codetag
may pick the uninitialized section when manipulating tags during module
unload, and leads to "unable to handle page fault" BUG.
Link: https://lkml.kernel.org/r/20250519163823.7540-1-00107082@163.com
Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory")
Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/
Signed-off-by: David Wang <00107082(a)163.com>
Acked-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: Petr Pavlu <petr.pavlu(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/module/main.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/module/main.c~module-release-codetag-section-when-module-load-fails
+++ a/kernel/module/main.c
@@ -2829,6 +2829,7 @@ static void module_deallocate(struct mod
{
percpu_modfree(mod);
module_arch_freeing_init(mod);
+ codetag_free_module_sections(mod);
free_mod_mem(mod);
}
_
Patches currently in -mm which might be from 00107082(a)163.com are
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x b3f6fcd8404f9f92262303369bb877ec5d188a81
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052454-kinship-reiterate-9e47@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b3f6fcd8404f9f92262303369bb877ec5d188a81 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tdave(a)nvidia.com>
Date: Mon, 19 May 2025 18:19:37 -0700
Subject: [PATCH] iommu: Skip PASID validation for devices without PASID
capability
Generally PASID support requires ACS settings that usually create
single device groups, but there are some niche cases where we can get
multi-device groups and still have working PASID support. The primary
issue is that PCI switches are not required to treat PASID tagged TLPs
specially so appropriate ACS settings are required to route all TLPs to
the host bridge if PASID is going to work properly.
pci_enable_pasid() does check that each device that will use PASID has
the proper ACS settings to achieve this routing.
However, no-PASID devices can be combined with PASID capable devices
within the same topology using non-uniform ACS settings. In this case
the no-PASID devices may not have strict route to host ACS flags and
end up being grouped with the PASID devices.
This configuration fails to allow use of the PASID within the iommu
core code which wrongly checks if the no-PASID device supports PASID.
Fix this by ignoring no-PASID devices during the PASID validation. They
will never issue a PASID TLP anyhow so they can be ignored.
Fixes: c404f55c26fc ("iommu: Validate the PASID in iommu_attach_device_pasid()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tushar Dave <tdave(a)nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Reviewed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Link: https://lore.kernel.org/r/20250520011937.3230557-1-tdave@nvidia.com
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 4f91a740c15f..9d728800a862 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3366,10 +3366,12 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
int ret;
for_each_group_device(group, device) {
- ret = domain->ops->set_dev_pasid(domain, device->dev,
- pasid, old);
- if (ret)
- goto err_revert;
+ if (device->dev->iommu->max_pasids > 0) {
+ ret = domain->ops->set_dev_pasid(domain, device->dev,
+ pasid, old);
+ if (ret)
+ goto err_revert;
+ }
}
return 0;
@@ -3379,15 +3381,18 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
for_each_group_device(group, device) {
if (device == last_gdev)
break;
- /*
- * If no old domain, undo the succeeded devices/pasid.
- * Otherwise, rollback the succeeded devices/pasid to the old
- * domain. And it is a driver bug to fail attaching with a
- * previously good domain.
- */
- if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+ if (device->dev->iommu->max_pasids > 0) {
+ /*
+ * If no old domain, undo the succeeded devices/pasid.
+ * Otherwise, rollback the succeeded devices/pasid to
+ * the old domain. And it is a driver bug to fail
+ * attaching with a previously good domain.
+ */
+ if (!old ||
+ WARN_ON(old->ops->set_dev_pasid(old, device->dev,
pasid, domain)))
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
return ret;
}
@@ -3398,8 +3403,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
{
struct group_device *device;
- for_each_group_device(group, device)
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ for_each_group_device(group, device) {
+ if (device->dev->iommu->max_pasids > 0)
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
/*
@@ -3440,7 +3447,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
mutex_lock(&group->mutex);
for_each_group_device(group, device) {
- if (pasid >= device->dev->iommu->max_pasids) {
+ /*
+ * Skip PASID validation for devices without PASID support
+ * (max_pasids = 0). These devices cannot issue transactions
+ * with PASID, so they don't affect group's PASID usage.
+ */
+ if ((device->dev->iommu->max_pasids > 0) &&
+ (pasid >= device->dev->iommu->max_pasids)) {
ret = -EINVAL;
goto out_unlock;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x b3f6fcd8404f9f92262303369bb877ec5d188a81
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052453-elastic-creature-b8c3@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b3f6fcd8404f9f92262303369bb877ec5d188a81 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tdave(a)nvidia.com>
Date: Mon, 19 May 2025 18:19:37 -0700
Subject: [PATCH] iommu: Skip PASID validation for devices without PASID
capability
Generally PASID support requires ACS settings that usually create
single device groups, but there are some niche cases where we can get
multi-device groups and still have working PASID support. The primary
issue is that PCI switches are not required to treat PASID tagged TLPs
specially so appropriate ACS settings are required to route all TLPs to
the host bridge if PASID is going to work properly.
pci_enable_pasid() does check that each device that will use PASID has
the proper ACS settings to achieve this routing.
However, no-PASID devices can be combined with PASID capable devices
within the same topology using non-uniform ACS settings. In this case
the no-PASID devices may not have strict route to host ACS flags and
end up being grouped with the PASID devices.
This configuration fails to allow use of the PASID within the iommu
core code which wrongly checks if the no-PASID device supports PASID.
Fix this by ignoring no-PASID devices during the PASID validation. They
will never issue a PASID TLP anyhow so they can be ignored.
Fixes: c404f55c26fc ("iommu: Validate the PASID in iommu_attach_device_pasid()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tushar Dave <tdave(a)nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Reviewed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Link: https://lore.kernel.org/r/20250520011937.3230557-1-tdave@nvidia.com
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 4f91a740c15f..9d728800a862 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3366,10 +3366,12 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
int ret;
for_each_group_device(group, device) {
- ret = domain->ops->set_dev_pasid(domain, device->dev,
- pasid, old);
- if (ret)
- goto err_revert;
+ if (device->dev->iommu->max_pasids > 0) {
+ ret = domain->ops->set_dev_pasid(domain, device->dev,
+ pasid, old);
+ if (ret)
+ goto err_revert;
+ }
}
return 0;
@@ -3379,15 +3381,18 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
for_each_group_device(group, device) {
if (device == last_gdev)
break;
- /*
- * If no old domain, undo the succeeded devices/pasid.
- * Otherwise, rollback the succeeded devices/pasid to the old
- * domain. And it is a driver bug to fail attaching with a
- * previously good domain.
- */
- if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+ if (device->dev->iommu->max_pasids > 0) {
+ /*
+ * If no old domain, undo the succeeded devices/pasid.
+ * Otherwise, rollback the succeeded devices/pasid to
+ * the old domain. And it is a driver bug to fail
+ * attaching with a previously good domain.
+ */
+ if (!old ||
+ WARN_ON(old->ops->set_dev_pasid(old, device->dev,
pasid, domain)))
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
return ret;
}
@@ -3398,8 +3403,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
{
struct group_device *device;
- for_each_group_device(group, device)
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ for_each_group_device(group, device) {
+ if (device->dev->iommu->max_pasids > 0)
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
/*
@@ -3440,7 +3447,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
mutex_lock(&group->mutex);
for_each_group_device(group, device) {
- if (pasid >= device->dev->iommu->max_pasids) {
+ /*
+ * Skip PASID validation for devices without PASID support
+ * (max_pasids = 0). These devices cannot issue transactions
+ * with PASID, so they don't affect group's PASID usage.
+ */
+ if ((device->dev->iommu->max_pasids > 0) &&
+ (pasid >= device->dev->iommu->max_pasids)) {
ret = -EINVAL;
goto out_unlock;
}
The patch below does not apply to the 6.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.14.y
git checkout FETCH_HEAD
git cherry-pick -x b3f6fcd8404f9f92262303369bb877ec5d188a81
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052452-sputter-outmost-ef4f@gregkh' --subject-prefix 'PATCH 6.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b3f6fcd8404f9f92262303369bb877ec5d188a81 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tdave(a)nvidia.com>
Date: Mon, 19 May 2025 18:19:37 -0700
Subject: [PATCH] iommu: Skip PASID validation for devices without PASID
capability
Generally PASID support requires ACS settings that usually create
single device groups, but there are some niche cases where we can get
multi-device groups and still have working PASID support. The primary
issue is that PCI switches are not required to treat PASID tagged TLPs
specially so appropriate ACS settings are required to route all TLPs to
the host bridge if PASID is going to work properly.
pci_enable_pasid() does check that each device that will use PASID has
the proper ACS settings to achieve this routing.
However, no-PASID devices can be combined with PASID capable devices
within the same topology using non-uniform ACS settings. In this case
the no-PASID devices may not have strict route to host ACS flags and
end up being grouped with the PASID devices.
This configuration fails to allow use of the PASID within the iommu
core code which wrongly checks if the no-PASID device supports PASID.
Fix this by ignoring no-PASID devices during the PASID validation. They
will never issue a PASID TLP anyhow so they can be ignored.
Fixes: c404f55c26fc ("iommu: Validate the PASID in iommu_attach_device_pasid()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tushar Dave <tdave(a)nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Reviewed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Link: https://lore.kernel.org/r/20250520011937.3230557-1-tdave@nvidia.com
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 4f91a740c15f..9d728800a862 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3366,10 +3366,12 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
int ret;
for_each_group_device(group, device) {
- ret = domain->ops->set_dev_pasid(domain, device->dev,
- pasid, old);
- if (ret)
- goto err_revert;
+ if (device->dev->iommu->max_pasids > 0) {
+ ret = domain->ops->set_dev_pasid(domain, device->dev,
+ pasid, old);
+ if (ret)
+ goto err_revert;
+ }
}
return 0;
@@ -3379,15 +3381,18 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
for_each_group_device(group, device) {
if (device == last_gdev)
break;
- /*
- * If no old domain, undo the succeeded devices/pasid.
- * Otherwise, rollback the succeeded devices/pasid to the old
- * domain. And it is a driver bug to fail attaching with a
- * previously good domain.
- */
- if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+ if (device->dev->iommu->max_pasids > 0) {
+ /*
+ * If no old domain, undo the succeeded devices/pasid.
+ * Otherwise, rollback the succeeded devices/pasid to
+ * the old domain. And it is a driver bug to fail
+ * attaching with a previously good domain.
+ */
+ if (!old ||
+ WARN_ON(old->ops->set_dev_pasid(old, device->dev,
pasid, domain)))
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
return ret;
}
@@ -3398,8 +3403,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
{
struct group_device *device;
- for_each_group_device(group, device)
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ for_each_group_device(group, device) {
+ if (device->dev->iommu->max_pasids > 0)
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
/*
@@ -3440,7 +3447,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
mutex_lock(&group->mutex);
for_each_group_device(group, device) {
- if (pasid >= device->dev->iommu->max_pasids) {
+ /*
+ * Skip PASID validation for devices without PASID support
+ * (max_pasids = 0). These devices cannot issue transactions
+ * with PASID, so they don't affect group's PASID usage.
+ */
+ if ((device->dev->iommu->max_pasids > 0) &&
+ (pasid >= device->dev->iommu->max_pasids)) {
ret = -EINVAL;
goto out_unlock;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x b04f0d89e880bc2cca6a5c73cf287082c91878da
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052403-postcard-ferocious-b503@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b04f0d89e880bc2cca6a5c73cf287082c91878da Mon Sep 17 00:00:00 2001
From: Gabor Juhos <j4g8y7(a)gmail.com>
Date: Fri, 9 May 2025 15:48:52 +0200
Subject: [PATCH] arm64: dts: marvell: uDPU: define pinctrl state for alarm
LEDs
The two alarm LEDs of on the uDPU board are stopped working since
commit 78efa53e715e ("leds: Init leds class earlier").
The LEDs are driven by the GPIO{15,16} pins of the North Bridge
GPIO controller. These pins are part of the 'spi_quad' pin group
for which the 'spi' function is selected via the default pinctrl
state of the 'spi' node. This is wrong however, since in order to
allow controlling the LEDs, the pins should use the 'gpio' function.
Before the commit mentined above, the 'spi' function is selected
first by the pinctrl core before probing the spi driver, but then
it gets overridden to 'gpio' implicitly via the
devm_gpiod_get_index_optional() call from the 'leds-gpio' driver.
After the commit, the LED subsystem gets initialized before the
SPI subsystem, so the function of the pin group remains 'spi'
which in turn prevents controlling of the LEDs.
Despite the change of the initialization order, the root cause is
that the pinctrl state definition is wrong since its initial commit
0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board"),
To fix the problem, override the function in the 'spi_quad_pins'
node to 'gpio' and move the pinctrl state definition from the
'spi' node into the 'leds' node.
Cc: stable(a)vger.kernel.org # needs adjustment for < 6.1
Fixes: 0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board")
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
Signed-off-by: Imre Kaloz <kaloz(a)openwrt.org>
Signed-off-by: Gregory CLEMENT <gregory.clement(a)bootlin.com>
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
index 3a9b6907185d..242820845707 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
@@ -26,6 +26,8 @@ memory@0 {
leds {
compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi_quad_pins>;
led-power1 {
label = "udpu:green:power";
@@ -82,8 +84,6 @@ &sdhci0 {
&spi0 {
status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&spi_quad_pins>;
flash@0 {
compatible = "jedec,spi-nor";
@@ -108,6 +108,10 @@ partition@180000 {
};
};
+&spi_quad_pins {
+ function = "gpio";
+};
+
&pinctrl_nb {
i2c2_recovery_pins: i2c2-recovery-pins {
groups = "i2c2";
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b04f0d89e880bc2cca6a5c73cf287082c91878da
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052402-pantomime-relative-1605@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b04f0d89e880bc2cca6a5c73cf287082c91878da Mon Sep 17 00:00:00 2001
From: Gabor Juhos <j4g8y7(a)gmail.com>
Date: Fri, 9 May 2025 15:48:52 +0200
Subject: [PATCH] arm64: dts: marvell: uDPU: define pinctrl state for alarm
LEDs
The two alarm LEDs of on the uDPU board are stopped working since
commit 78efa53e715e ("leds: Init leds class earlier").
The LEDs are driven by the GPIO{15,16} pins of the North Bridge
GPIO controller. These pins are part of the 'spi_quad' pin group
for which the 'spi' function is selected via the default pinctrl
state of the 'spi' node. This is wrong however, since in order to
allow controlling the LEDs, the pins should use the 'gpio' function.
Before the commit mentined above, the 'spi' function is selected
first by the pinctrl core before probing the spi driver, but then
it gets overridden to 'gpio' implicitly via the
devm_gpiod_get_index_optional() call from the 'leds-gpio' driver.
After the commit, the LED subsystem gets initialized before the
SPI subsystem, so the function of the pin group remains 'spi'
which in turn prevents controlling of the LEDs.
Despite the change of the initialization order, the root cause is
that the pinctrl state definition is wrong since its initial commit
0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board"),
To fix the problem, override the function in the 'spi_quad_pins'
node to 'gpio' and move the pinctrl state definition from the
'spi' node into the 'leds' node.
Cc: stable(a)vger.kernel.org # needs adjustment for < 6.1
Fixes: 0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board")
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
Signed-off-by: Imre Kaloz <kaloz(a)openwrt.org>
Signed-off-by: Gregory CLEMENT <gregory.clement(a)bootlin.com>
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
index 3a9b6907185d..242820845707 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
@@ -26,6 +26,8 @@ memory@0 {
leds {
compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi_quad_pins>;
led-power1 {
label = "udpu:green:power";
@@ -82,8 +84,6 @@ &sdhci0 {
&spi0 {
status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&spi_quad_pins>;
flash@0 {
compatible = "jedec,spi-nor";
@@ -108,6 +108,10 @@ partition@180000 {
};
};
+&spi_quad_pins {
+ function = "gpio";
+};
+
&pinctrl_nb {
i2c2_recovery_pins: i2c2-recovery-pins {
groups = "i2c2";
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b04f0d89e880bc2cca6a5c73cf287082c91878da
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052401-unbiased-designate-2089@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b04f0d89e880bc2cca6a5c73cf287082c91878da Mon Sep 17 00:00:00 2001
From: Gabor Juhos <j4g8y7(a)gmail.com>
Date: Fri, 9 May 2025 15:48:52 +0200
Subject: [PATCH] arm64: dts: marvell: uDPU: define pinctrl state for alarm
LEDs
The two alarm LEDs of on the uDPU board are stopped working since
commit 78efa53e715e ("leds: Init leds class earlier").
The LEDs are driven by the GPIO{15,16} pins of the North Bridge
GPIO controller. These pins are part of the 'spi_quad' pin group
for which the 'spi' function is selected via the default pinctrl
state of the 'spi' node. This is wrong however, since in order to
allow controlling the LEDs, the pins should use the 'gpio' function.
Before the commit mentined above, the 'spi' function is selected
first by the pinctrl core before probing the spi driver, but then
it gets overridden to 'gpio' implicitly via the
devm_gpiod_get_index_optional() call from the 'leds-gpio' driver.
After the commit, the LED subsystem gets initialized before the
SPI subsystem, so the function of the pin group remains 'spi'
which in turn prevents controlling of the LEDs.
Despite the change of the initialization order, the root cause is
that the pinctrl state definition is wrong since its initial commit
0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board"),
To fix the problem, override the function in the 'spi_quad_pins'
node to 'gpio' and move the pinctrl state definition from the
'spi' node into the 'leds' node.
Cc: stable(a)vger.kernel.org # needs adjustment for < 6.1
Fixes: 0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board")
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
Signed-off-by: Imre Kaloz <kaloz(a)openwrt.org>
Signed-off-by: Gregory CLEMENT <gregory.clement(a)bootlin.com>
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
index 3a9b6907185d..242820845707 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
@@ -26,6 +26,8 @@ memory@0 {
leds {
compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi_quad_pins>;
led-power1 {
label = "udpu:green:power";
@@ -82,8 +84,6 @@ &sdhci0 {
&spi0 {
status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&spi_quad_pins>;
flash@0 {
compatible = "jedec,spi-nor";
@@ -108,6 +108,10 @@ partition@180000 {
};
};
+&spi_quad_pins {
+ function = "gpio";
+};
+
&pinctrl_nb {
i2c2_recovery_pins: i2c2-recovery-pins {
groups = "i2c2";
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b04f0d89e880bc2cca6a5c73cf287082c91878da
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052400-sneer-rendering-46c3@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b04f0d89e880bc2cca6a5c73cf287082c91878da Mon Sep 17 00:00:00 2001
From: Gabor Juhos <j4g8y7(a)gmail.com>
Date: Fri, 9 May 2025 15:48:52 +0200
Subject: [PATCH] arm64: dts: marvell: uDPU: define pinctrl state for alarm
LEDs
The two alarm LEDs of on the uDPU board are stopped working since
commit 78efa53e715e ("leds: Init leds class earlier").
The LEDs are driven by the GPIO{15,16} pins of the North Bridge
GPIO controller. These pins are part of the 'spi_quad' pin group
for which the 'spi' function is selected via the default pinctrl
state of the 'spi' node. This is wrong however, since in order to
allow controlling the LEDs, the pins should use the 'gpio' function.
Before the commit mentined above, the 'spi' function is selected
first by the pinctrl core before probing the spi driver, but then
it gets overridden to 'gpio' implicitly via the
devm_gpiod_get_index_optional() call from the 'leds-gpio' driver.
After the commit, the LED subsystem gets initialized before the
SPI subsystem, so the function of the pin group remains 'spi'
which in turn prevents controlling of the LEDs.
Despite the change of the initialization order, the root cause is
that the pinctrl state definition is wrong since its initial commit
0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board"),
To fix the problem, override the function in the 'spi_quad_pins'
node to 'gpio' and move the pinctrl state definition from the
'spi' node into the 'leds' node.
Cc: stable(a)vger.kernel.org # needs adjustment for < 6.1
Fixes: 0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board")
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
Signed-off-by: Imre Kaloz <kaloz(a)openwrt.org>
Signed-off-by: Gregory CLEMENT <gregory.clement(a)bootlin.com>
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
index 3a9b6907185d..242820845707 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
@@ -26,6 +26,8 @@ memory@0 {
leds {
compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi_quad_pins>;
led-power1 {
label = "udpu:green:power";
@@ -82,8 +84,6 @@ &sdhci0 {
&spi0 {
status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&spi_quad_pins>;
flash@0 {
compatible = "jedec,spi-nor";
@@ -108,6 +108,10 @@ partition@180000 {
};
};
+&spi_quad_pins {
+ function = "gpio";
+};
+
&pinctrl_nb {
i2c2_recovery_pins: i2c2-recovery-pins {
groups = "i2c2";
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052400-culinary-streak-b400@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Thu, 8 May 2025 09:29:23 +0300
Subject: [PATCH] pmdomain: core: Fix error checking in
genpd_dev_pm_attach_by_id()
The error checking for of_count_phandle_with_args() does not handle
negative error codes correctly. The problem is that "index" is a u32 so
in the condition "if (index >= num_domains)" negative error codes stored
in "num_domains" are type promoted to very high positive values and
"index" is always going to be valid.
Test for negative error codes first and then test if "index" is valid.
Fixes: 3ccf3f0cd197 ("PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain")
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/aBxPQ8AI8N5v-7rL@stanley.mountain
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index 9b2f28b34bb5..d6c1ddb807b2 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -3126,7 +3126,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
/* Verify that the index is within a valid range. */
num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells");
- if (index >= num_domains)
+ if (num_domains < 0 || index >= num_domains)
return NULL;
/* Allocate and register device on the genpd bus. */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052459-bully-agreeing-45f1@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Thu, 8 May 2025 09:29:23 +0300
Subject: [PATCH] pmdomain: core: Fix error checking in
genpd_dev_pm_attach_by_id()
The error checking for of_count_phandle_with_args() does not handle
negative error codes correctly. The problem is that "index" is a u32 so
in the condition "if (index >= num_domains)" negative error codes stored
in "num_domains" are type promoted to very high positive values and
"index" is always going to be valid.
Test for negative error codes first and then test if "index" is valid.
Fixes: 3ccf3f0cd197 ("PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain")
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/aBxPQ8AI8N5v-7rL@stanley.mountain
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index 9b2f28b34bb5..d6c1ddb807b2 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -3126,7 +3126,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
/* Verify that the index is within a valid range. */
num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells");
- if (index >= num_domains)
+ if (num_domains < 0 || index >= num_domains)
return NULL;
/* Allocate and register device on the genpd bus. */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052459-sleeve-rearrange-e01c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Thu, 8 May 2025 09:29:23 +0300
Subject: [PATCH] pmdomain: core: Fix error checking in
genpd_dev_pm_attach_by_id()
The error checking for of_count_phandle_with_args() does not handle
negative error codes correctly. The problem is that "index" is a u32 so
in the condition "if (index >= num_domains)" negative error codes stored
in "num_domains" are type promoted to very high positive values and
"index" is always going to be valid.
Test for negative error codes first and then test if "index" is valid.
Fixes: 3ccf3f0cd197 ("PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain")
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/aBxPQ8AI8N5v-7rL@stanley.mountain
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index 9b2f28b34bb5..d6c1ddb807b2 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -3126,7 +3126,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
/* Verify that the index is within a valid range. */
num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells");
- if (index >= num_domains)
+ if (num_domains < 0 || index >= num_domains)
return NULL;
/* Allocate and register device on the genpd bus. */
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052458-fool-conflict-1a61@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Thu, 8 May 2025 09:29:23 +0300
Subject: [PATCH] pmdomain: core: Fix error checking in
genpd_dev_pm_attach_by_id()
The error checking for of_count_phandle_with_args() does not handle
negative error codes correctly. The problem is that "index" is a u32 so
in the condition "if (index >= num_domains)" negative error codes stored
in "num_domains" are type promoted to very high positive values and
"index" is always going to be valid.
Test for negative error codes first and then test if "index" is valid.
Fixes: 3ccf3f0cd197 ("PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain")
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/aBxPQ8AI8N5v-7rL@stanley.mountain
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index 9b2f28b34bb5..d6c1ddb807b2 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -3126,7 +3126,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
/* Verify that the index is within a valid range. */
num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells");
- if (index >= num_domains)
+ if (num_domains < 0 || index >= num_domains)
return NULL;
/* Allocate and register device on the genpd bus. */
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052458-boogeyman-busily-acd9@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Thu, 8 May 2025 09:29:23 +0300
Subject: [PATCH] pmdomain: core: Fix error checking in
genpd_dev_pm_attach_by_id()
The error checking for of_count_phandle_with_args() does not handle
negative error codes correctly. The problem is that "index" is a u32 so
in the condition "if (index >= num_domains)" negative error codes stored
in "num_domains" are type promoted to very high positive values and
"index" is always going to be valid.
Test for negative error codes first and then test if "index" is valid.
Fixes: 3ccf3f0cd197 ("PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain")
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/aBxPQ8AI8N5v-7rL@stanley.mountain
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index 9b2f28b34bb5..d6c1ddb807b2 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -3126,7 +3126,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
/* Verify that the index is within a valid range. */
num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells");
- if (index >= num_domains)
+ if (num_domains < 0 || index >= num_domains)
return NULL;
/* Allocate and register device on the genpd bus. */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 43f0999af011fba646e015f0bb08b6c3002a0170
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052427-entrust-arousal-0ad5@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 43f0999af011fba646e015f0bb08b6c3002a0170 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <ronak.doshi(a)broadcom.com>
Date: Thu, 15 May 2025 19:04:56 +0000
Subject: [PATCH] vmxnet3: update MTU after device quiesce
Currently, when device mtu is updated, vmxnet3 updates netdev mtu, quiesces
the device and then reactivates it for the ESXi to know about the new mtu.
So, technically the OS stack can start using the new mtu before ESXi knows
about the new mtu.
This can lead to issues for TSO packets which use mss as per the new mtu
configured. This patch fixes this issue by moving the mtu write after
device quiesce.
Cc: stable(a)vger.kernel.org
Fixes: d1a890fa37f2 ("net: VMware virtual Ethernet NIC driver: vmxnet3")
Signed-off-by: Ronak Doshi <ronak.doshi(a)broadcom.com>
Acked-by: Guolin Yang <guolin.yang(a)broadcom.com>
Changes v1-> v2:
Moved MTU write after destroy of rx rings
Link: https://patch.msgid.link/20250515190457.8597-1-ronak.doshi@broadcom.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3df6aabc7e33..c676979c7ab9 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3607,8 +3607,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
int err = 0;
- WRITE_ONCE(netdev->mtu, new_mtu);
-
/*
* Reset_work may be in the middle of resetting the device, wait for its
* completion.
@@ -3622,6 +3620,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
/* we need to re-create the rx queue based on the new mtu */
vmxnet3_rq_destroy_all(adapter);
+ WRITE_ONCE(netdev->mtu, new_mtu);
vmxnet3_adjust_rx_ring_size(adapter);
err = vmxnet3_rq_create_all(adapter);
if (err) {
@@ -3638,6 +3637,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
"Closing it\n", err);
goto out;
}
+ } else {
+ WRITE_ONCE(netdev->mtu, new_mtu);
}
out:
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 43f0999af011fba646e015f0bb08b6c3002a0170
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052426-pleading-slip-af7c@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 43f0999af011fba646e015f0bb08b6c3002a0170 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <ronak.doshi(a)broadcom.com>
Date: Thu, 15 May 2025 19:04:56 +0000
Subject: [PATCH] vmxnet3: update MTU after device quiesce
Currently, when device mtu is updated, vmxnet3 updates netdev mtu, quiesces
the device and then reactivates it for the ESXi to know about the new mtu.
So, technically the OS stack can start using the new mtu before ESXi knows
about the new mtu.
This can lead to issues for TSO packets which use mss as per the new mtu
configured. This patch fixes this issue by moving the mtu write after
device quiesce.
Cc: stable(a)vger.kernel.org
Fixes: d1a890fa37f2 ("net: VMware virtual Ethernet NIC driver: vmxnet3")
Signed-off-by: Ronak Doshi <ronak.doshi(a)broadcom.com>
Acked-by: Guolin Yang <guolin.yang(a)broadcom.com>
Changes v1-> v2:
Moved MTU write after destroy of rx rings
Link: https://patch.msgid.link/20250515190457.8597-1-ronak.doshi@broadcom.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3df6aabc7e33..c676979c7ab9 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3607,8 +3607,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
int err = 0;
- WRITE_ONCE(netdev->mtu, new_mtu);
-
/*
* Reset_work may be in the middle of resetting the device, wait for its
* completion.
@@ -3622,6 +3620,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
/* we need to re-create the rx queue based on the new mtu */
vmxnet3_rq_destroy_all(adapter);
+ WRITE_ONCE(netdev->mtu, new_mtu);
vmxnet3_adjust_rx_ring_size(adapter);
err = vmxnet3_rq_create_all(adapter);
if (err) {
@@ -3638,6 +3637,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
"Closing it\n", err);
goto out;
}
+ } else {
+ WRITE_ONCE(netdev->mtu, new_mtu);
}
out:
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 43f0999af011fba646e015f0bb08b6c3002a0170
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052425-posture-finlike-af9b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 43f0999af011fba646e015f0bb08b6c3002a0170 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <ronak.doshi(a)broadcom.com>
Date: Thu, 15 May 2025 19:04:56 +0000
Subject: [PATCH] vmxnet3: update MTU after device quiesce
Currently, when device mtu is updated, vmxnet3 updates netdev mtu, quiesces
the device and then reactivates it for the ESXi to know about the new mtu.
So, technically the OS stack can start using the new mtu before ESXi knows
about the new mtu.
This can lead to issues for TSO packets which use mss as per the new mtu
configured. This patch fixes this issue by moving the mtu write after
device quiesce.
Cc: stable(a)vger.kernel.org
Fixes: d1a890fa37f2 ("net: VMware virtual Ethernet NIC driver: vmxnet3")
Signed-off-by: Ronak Doshi <ronak.doshi(a)broadcom.com>
Acked-by: Guolin Yang <guolin.yang(a)broadcom.com>
Changes v1-> v2:
Moved MTU write after destroy of rx rings
Link: https://patch.msgid.link/20250515190457.8597-1-ronak.doshi@broadcom.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3df6aabc7e33..c676979c7ab9 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3607,8 +3607,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
int err = 0;
- WRITE_ONCE(netdev->mtu, new_mtu);
-
/*
* Reset_work may be in the middle of resetting the device, wait for its
* completion.
@@ -3622,6 +3620,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
/* we need to re-create the rx queue based on the new mtu */
vmxnet3_rq_destroy_all(adapter);
+ WRITE_ONCE(netdev->mtu, new_mtu);
vmxnet3_adjust_rx_ring_size(adapter);
err = vmxnet3_rq_create_all(adapter);
if (err) {
@@ -3638,6 +3637,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
"Closing it\n", err);
goto out;
}
+ } else {
+ WRITE_ONCE(netdev->mtu, new_mtu);
}
out:
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 43f0999af011fba646e015f0bb08b6c3002a0170
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052425-willed-landline-ff5b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 43f0999af011fba646e015f0bb08b6c3002a0170 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <ronak.doshi(a)broadcom.com>
Date: Thu, 15 May 2025 19:04:56 +0000
Subject: [PATCH] vmxnet3: update MTU after device quiesce
Currently, when device mtu is updated, vmxnet3 updates netdev mtu, quiesces
the device and then reactivates it for the ESXi to know about the new mtu.
So, technically the OS stack can start using the new mtu before ESXi knows
about the new mtu.
This can lead to issues for TSO packets which use mss as per the new mtu
configured. This patch fixes this issue by moving the mtu write after
device quiesce.
Cc: stable(a)vger.kernel.org
Fixes: d1a890fa37f2 ("net: VMware virtual Ethernet NIC driver: vmxnet3")
Signed-off-by: Ronak Doshi <ronak.doshi(a)broadcom.com>
Acked-by: Guolin Yang <guolin.yang(a)broadcom.com>
Changes v1-> v2:
Moved MTU write after destroy of rx rings
Link: https://patch.msgid.link/20250515190457.8597-1-ronak.doshi@broadcom.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3df6aabc7e33..c676979c7ab9 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3607,8 +3607,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
int err = 0;
- WRITE_ONCE(netdev->mtu, new_mtu);
-
/*
* Reset_work may be in the middle of resetting the device, wait for its
* completion.
@@ -3622,6 +3620,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
/* we need to re-create the rx queue based on the new mtu */
vmxnet3_rq_destroy_all(adapter);
+ WRITE_ONCE(netdev->mtu, new_mtu);
vmxnet3_adjust_rx_ring_size(adapter);
err = vmxnet3_rq_create_all(adapter);
if (err) {
@@ -3638,6 +3637,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
"Closing it\n", err);
goto out;
}
+ } else {
+ WRITE_ONCE(netdev->mtu, new_mtu);
}
out:
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 43f0999af011fba646e015f0bb08b6c3002a0170
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052424-banjo-exorcist-8407@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 43f0999af011fba646e015f0bb08b6c3002a0170 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <ronak.doshi(a)broadcom.com>
Date: Thu, 15 May 2025 19:04:56 +0000
Subject: [PATCH] vmxnet3: update MTU after device quiesce
Currently, when device mtu is updated, vmxnet3 updates netdev mtu, quiesces
the device and then reactivates it for the ESXi to know about the new mtu.
So, technically the OS stack can start using the new mtu before ESXi knows
about the new mtu.
This can lead to issues for TSO packets which use mss as per the new mtu
configured. This patch fixes this issue by moving the mtu write after
device quiesce.
Cc: stable(a)vger.kernel.org
Fixes: d1a890fa37f2 ("net: VMware virtual Ethernet NIC driver: vmxnet3")
Signed-off-by: Ronak Doshi <ronak.doshi(a)broadcom.com>
Acked-by: Guolin Yang <guolin.yang(a)broadcom.com>
Changes v1-> v2:
Moved MTU write after destroy of rx rings
Link: https://patch.msgid.link/20250515190457.8597-1-ronak.doshi@broadcom.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3df6aabc7e33..c676979c7ab9 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3607,8 +3607,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
int err = 0;
- WRITE_ONCE(netdev->mtu, new_mtu);
-
/*
* Reset_work may be in the middle of resetting the device, wait for its
* completion.
@@ -3622,6 +3620,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
/* we need to re-create the rx queue based on the new mtu */
vmxnet3_rq_destroy_all(adapter);
+ WRITE_ONCE(netdev->mtu, new_mtu);
vmxnet3_adjust_rx_ring_size(adapter);
err = vmxnet3_rq_create_all(adapter);
if (err) {
@@ -3638,6 +3637,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
"Closing it\n", err);
goto out;
}
+ } else {
+ WRITE_ONCE(netdev->mtu, new_mtu);
}
out:
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 7150d57c370f9e61b7d0e82c58002f1c5a205ac4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052431-scorer-rebuild-b36a@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7150d57c370f9e61b7d0e82c58002f1c5a205ac4 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding(a)opensource.cirrus.com>
Date: Tue, 20 May 2025 13:47:43 +0100
Subject: [PATCH] ALSA: hda/realtek: Add support for HP Agusta using CS35L41
HDA
Add support for HP Agusta.
Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C
Signed-off-by: Stefan Binding <sbinding(a)opensource.cirrus.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520124757.12597-1-sbinding@opensource.cirrus.…
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a426d9882702..69788dd9a1ec 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10888,6 +10888,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 7150d57c370f9e61b7d0e82c58002f1c5a205ac4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052431-padding-scalping-8aae@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7150d57c370f9e61b7d0e82c58002f1c5a205ac4 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding(a)opensource.cirrus.com>
Date: Tue, 20 May 2025 13:47:43 +0100
Subject: [PATCH] ALSA: hda/realtek: Add support for HP Agusta using CS35L41
HDA
Add support for HP Agusta.
Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C
Signed-off-by: Stefan Binding <sbinding(a)opensource.cirrus.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520124757.12597-1-sbinding@opensource.cirrus.…
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a426d9882702..69788dd9a1ec 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10888,6 +10888,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 7150d57c370f9e61b7d0e82c58002f1c5a205ac4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052430-aching-oval-a807@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7150d57c370f9e61b7d0e82c58002f1c5a205ac4 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding(a)opensource.cirrus.com>
Date: Tue, 20 May 2025 13:47:43 +0100
Subject: [PATCH] ALSA: hda/realtek: Add support for HP Agusta using CS35L41
HDA
Add support for HP Agusta.
Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C
Signed-off-by: Stefan Binding <sbinding(a)opensource.cirrus.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520124757.12597-1-sbinding@opensource.cirrus.…
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a426d9882702..69788dd9a1ec 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10888,6 +10888,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 7150d57c370f9e61b7d0e82c58002f1c5a205ac4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052430-fable-scratch-0fa5@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7150d57c370f9e61b7d0e82c58002f1c5a205ac4 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding(a)opensource.cirrus.com>
Date: Tue, 20 May 2025 13:47:43 +0100
Subject: [PATCH] ALSA: hda/realtek: Add support for HP Agusta using CS35L41
HDA
Add support for HP Agusta.
Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C
Signed-off-by: Stefan Binding <sbinding(a)opensource.cirrus.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520124757.12597-1-sbinding@opensource.cirrus.…
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a426d9882702..69788dd9a1ec 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10888,6 +10888,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 7150d57c370f9e61b7d0e82c58002f1c5a205ac4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052430-reoccupy-unbroken-8b7d@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7150d57c370f9e61b7d0e82c58002f1c5a205ac4 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding(a)opensource.cirrus.com>
Date: Tue, 20 May 2025 13:47:43 +0100
Subject: [PATCH] ALSA: hda/realtek: Add support for HP Agusta using CS35L41
HDA
Add support for HP Agusta.
Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C
Signed-off-by: Stefan Binding <sbinding(a)opensource.cirrus.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520124757.12597-1-sbinding@opensource.cirrus.…
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a426d9882702..69788dd9a1ec 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10888,6 +10888,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 7150d57c370f9e61b7d0e82c58002f1c5a205ac4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052429-flame-quit-a059@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7150d57c370f9e61b7d0e82c58002f1c5a205ac4 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding(a)opensource.cirrus.com>
Date: Tue, 20 May 2025 13:47:43 +0100
Subject: [PATCH] ALSA: hda/realtek: Add support for HP Agusta using CS35L41
HDA
Add support for HP Agusta.
Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C
Signed-off-by: Stefan Binding <sbinding(a)opensource.cirrus.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520124757.12597-1-sbinding@opensource.cirrus.…
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a426d9882702..69788dd9a1ec 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10888,6 +10888,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 6.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.14.y
git checkout FETCH_HEAD
git cherry-pick -x 7150d57c370f9e61b7d0e82c58002f1c5a205ac4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052429-paging-request-9169@gregkh' --subject-prefix 'PATCH 6.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7150d57c370f9e61b7d0e82c58002f1c5a205ac4 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding(a)opensource.cirrus.com>
Date: Tue, 20 May 2025 13:47:43 +0100
Subject: [PATCH] ALSA: hda/realtek: Add support for HP Agusta using CS35L41
HDA
Add support for HP Agusta.
Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C
Signed-off-by: Stefan Binding <sbinding(a)opensource.cirrus.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520124757.12597-1-sbinding@opensource.cirrus.…
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a426d9882702..69788dd9a1ec 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10888,6 +10888,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f709b78aecab519dbcefa9a6603b94ad18c553e3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052421-document-satirical-8e58@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f709b78aecab519dbcefa9a6603b94ad18c553e3 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chris.chiu(a)canonical.com>
Date: Tue, 20 May 2025 21:21:01 +0800
Subject: [PATCH] ALSA: hda/realtek - Add new HP ZBook laptop with micmute led
fixup
New HP ZBook with Realtek HDA codec ALC3247 needs the quirk
ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED.
Signed-off-by: Chris Chiu <chris.chiu(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520132101.120685-1-chris.chiu@canonical.com
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 69788dd9a1ec..20ab1fb2195f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10885,6 +10885,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
+ SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f709b78aecab519dbcefa9a6603b94ad18c553e3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052421-wildness-crease-7ddd@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f709b78aecab519dbcefa9a6603b94ad18c553e3 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chris.chiu(a)canonical.com>
Date: Tue, 20 May 2025 21:21:01 +0800
Subject: [PATCH] ALSA: hda/realtek - Add new HP ZBook laptop with micmute led
fixup
New HP ZBook with Realtek HDA codec ALC3247 needs the quirk
ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED.
Signed-off-by: Chris Chiu <chris.chiu(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520132101.120685-1-chris.chiu@canonical.com
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 69788dd9a1ec..20ab1fb2195f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10885,6 +10885,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
+ SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f709b78aecab519dbcefa9a6603b94ad18c553e3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052421-diffusion-untimely-a099@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f709b78aecab519dbcefa9a6603b94ad18c553e3 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chris.chiu(a)canonical.com>
Date: Tue, 20 May 2025 21:21:01 +0800
Subject: [PATCH] ALSA: hda/realtek - Add new HP ZBook laptop with micmute led
fixup
New HP ZBook with Realtek HDA codec ALC3247 needs the quirk
ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED.
Signed-off-by: Chris Chiu <chris.chiu(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520132101.120685-1-chris.chiu@canonical.com
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 69788dd9a1ec..20ab1fb2195f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10885,6 +10885,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
+ SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x f709b78aecab519dbcefa9a6603b94ad18c553e3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052415-fang-botanist-0180@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f709b78aecab519dbcefa9a6603b94ad18c553e3 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chris.chiu(a)canonical.com>
Date: Tue, 20 May 2025 21:21:01 +0800
Subject: [PATCH] ALSA: hda/realtek - Add new HP ZBook laptop with micmute led
fixup
New HP ZBook with Realtek HDA codec ALC3247 needs the quirk
ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED.
Signed-off-by: Chris Chiu <chris.chiu(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520132101.120685-1-chris.chiu@canonical.com
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 69788dd9a1ec..20ab1fb2195f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10885,6 +10885,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
+ SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x f709b78aecab519dbcefa9a6603b94ad18c553e3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052415-poncho-unisexual-124a@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f709b78aecab519dbcefa9a6603b94ad18c553e3 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chris.chiu(a)canonical.com>
Date: Tue, 20 May 2025 21:21:01 +0800
Subject: [PATCH] ALSA: hda/realtek - Add new HP ZBook laptop with micmute led
fixup
New HP ZBook with Realtek HDA codec ALC3247 needs the quirk
ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED.
Signed-off-by: Chris Chiu <chris.chiu(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520132101.120685-1-chris.chiu@canonical.com
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 69788dd9a1ec..20ab1fb2195f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10885,6 +10885,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
+ SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 6.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.14.y
git checkout FETCH_HEAD
git cherry-pick -x f709b78aecab519dbcefa9a6603b94ad18c553e3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052414-rubbed-footing-28ca@gregkh' --subject-prefix 'PATCH 6.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f709b78aecab519dbcefa9a6603b94ad18c553e3 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chris.chiu(a)canonical.com>
Date: Tue, 20 May 2025 21:21:01 +0800
Subject: [PATCH] ALSA: hda/realtek - Add new HP ZBook laptop with micmute led
fixup
New HP ZBook with Realtek HDA codec ALC3247 needs the quirk
ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED.
Signed-off-by: Chris Chiu <chris.chiu(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520132101.120685-1-chris.chiu@canonical.com
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 69788dd9a1ec..20ab1fb2195f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10885,6 +10885,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
+ SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x f709b78aecab519dbcefa9a6603b94ad18c553e3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052414-bulge-curse-c065@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f709b78aecab519dbcefa9a6603b94ad18c553e3 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chris.chiu(a)canonical.com>
Date: Tue, 20 May 2025 21:21:01 +0800
Subject: [PATCH] ALSA: hda/realtek - Add new HP ZBook laptop with micmute led
fixup
New HP ZBook with Realtek HDA codec ALC3247 needs the quirk
ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED.
Signed-off-by: Chris Chiu <chris.chiu(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20250520132101.120685-1-chris.chiu@canonical.com
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 69788dd9a1ec..20ab1fb2195f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10885,6 +10885,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A),
+ SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
Hello Dear,
I hope this message finds you well, and let me extend my warmest wishes for a joyful, healthy, and prosperous 2025.
My name is Widad Babikar Omar, and a politician/investor from Sudan. For some time now, I’ve been looking for a trustworthy individual or a company to guide me in placing a huge private investment funds into strong, growing business environments — and your region truly stood out to me as full of potential and a viable option.
I’m interested in exploring a discreet, long-term investment opportunities outside of Sudan in any of these key sectors due to my background:
- Real estate and Construction
- Oil & Gas
- Renewable energy
- Manufacturing and Mining
- Healthcare
- Tech & innovation
- Education and Agriculture
My goal is to build a partnership based on trust, clarity, and mutual benefit. If you’re open to a conversation, I’d be glad to share more details and discuss how we might work together.
Thank you for considering this opportunity. I understand this might be a significant decision, and I’ll fully respect whatever response you choose to give.
Looking forward to hearing from you soon.
Warm regards,
Widad Babikar Omar
The RK3588 GPU power domain cannot be activated unless the external
power regulator is already on. When GPU support was added to this DT,
we had no way to represent this requirement, so `regulator-always-on`
was added to the `vdd_gpu_s0` regulator in order to ensure stability.
A later patch series (see "Fixes:" commit) resolved this shortcoming,
but that commit left the workaround -- and rendered the comment above
it no longer correct.
Remove the workaround to allow the GPU power regulator to power off, now
that the DT includes the necessary information to power it back on
correctly.
Fixes: f94500eb7328b ("arm64: dts: rockchip: Add GPU power domain regulator dependency for RK3588")
Signed-off-by: Sam Edwards <CFSworks(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
---
arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
index 60ad272982ad..6daea8961fdd 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
@@ -398,17 +398,6 @@ rk806_dvs3_null: dvs3-null-pins {
regulators {
vdd_gpu_s0: vdd_gpu_mem_s0: dcdc-reg1 {
- /*
- * RK3588's GPU power domain cannot be enabled
- * without this regulator active, but it
- * doesn't have to be on when the GPU PD is
- * disabled. Because the PD binding does not
- * currently allow us to express this
- * relationship, we have no choice but to do
- * this instead:
- */
- regulator-always-on;
-
regulator-boot-on;
regulator-min-microvolt = <550000>;
regulator-max-microvolt = <950000>;
--
2.48.1
The patch titled
Subject: mm: fix copy_vma() error handling for hugetlb mappings
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-fix-copy_vma-error-handling-for-hugetlb-mappings.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ricardo Ca��uelo Navarro <rcn(a)igalia.com>
Subject: mm: fix copy_vma() error handling for hugetlb mappings
Date: Fri, 23 May 2025 14:19:10 +0200
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma. This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy. As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter. This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.
The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1]. It's also a possible duplicate of public
syzbot report [2]. The WARNING report is:
============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
<TASK>
page_counter_uncharge+0x33/0x80
hugetlb_cgroup_uncharge_counter+0xcb/0x120
hugetlb_vm_op_close+0x579/0x960
? __pfx_hugetlb_vm_op_close+0x10/0x10
remove_vma+0x88/0x130
exit_mmap+0x71e/0xe00
? __pfx_exit_mmap+0x10/0x10
? __mutex_unlock_slowpath+0x22e/0x7f0
? __pfx_exit_aio+0x10/0x10
? __up_read+0x256/0x690
? uprobe_clear_state+0x274/0x290
? mm_update_next_owner+0xa9/0x810
__mmput+0xc9/0x370
exit_mm+0x203/0x2f0
? __pfx_exit_mm+0x10/0x10
? taskstats_exit+0x32b/0xa60
do_exit+0x921/0x2740
? do_raw_spin_lock+0x155/0x3b0
? __pfx_do_exit+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? _raw_spin_lock_irq+0xc5/0x100
do_group_exit+0x20c/0x2c0
get_signal+0x168c/0x1720
? __pfx_get_signal+0x10/0x10
? schedule+0x165/0x360
arch_do_signal_or_restart+0x8e/0x7d0
? __pfx_arch_do_signal_or_restart+0x10/0x10
? __pfx___se_sys_futex+0x10/0x10
syscall_exit_to_user_mode+0xb8/0x2c0
do_syscall_64+0x75/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
</TASK>
============================================================
Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df…
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Ca��uelo Navarro <rcn(a)igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 16 +++++++++++++++-
mm/mremap.c | 3 +--
mm/vma.c | 1 +
4 files changed, 22 insertions(+), 3 deletions(-)
--- a/include/linux/hugetlb.h~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(s
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
--- a/mm/hugetlb.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/hugetlb.c
@@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_a
/*
* Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held.
- * This function should be only used by move_vma() and operate on
+ * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
*/
@@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}
+
+/*
+ * For hugetlb, mremap() is an odd edge case - while the VMA copying is
+ * performed, we permit both the old and new VMAs to reference the same
+ * reservation.
+ *
+ * We fix this up after the operation succeeds, or if a newly allocated VMA
+ * is closed as a result of a failure to allocate memory.
+ */
+void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ clear_vma_resv_huge_pages(vma);
+}
--- a/mm/mremap.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/mremap.c
@@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_
mremap_userfaultfd_prep(new_vma, vrm->uf);
}
- if (is_vm_hugetlb_page(vma))
- clear_vma_resv_huge_pages(vma);
+ fixup_hugetlb_reservations(vma);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
--- a/mm/vma.c~mm-fix-copy_vma-error-handling-for-hugetlb-mappings
+++ a/mm/vma.c
@@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct v
return new_vma;
out_vma_link:
+ fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
_
Patches currently in -mm which might be from rcn(a)igalia.com are
mm-fix-copy_vma-error-handling-for-hugetlb-mappings.patch
The patch titled
Subject: memcg: always call cond_resched() after fn()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
memcg-always-call-cond_resched-after-fn.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Breno Leitao <leitao(a)debian.org>
Subject: memcg: always call cond_resched() after fn()
Date: Fri, 23 May 2025 10:21:06 -0700
I am seeing soft lockup on certain machine types when a cgroup OOMs. This
is happening because killing the process in certain machine might be very
slow, which causes the soft lockup and RCU stalls. This happens usually
when the cgroup has MANY processes and memory.oom.group is set.
Example I am seeing in real production:
[462012.244552] Memory cgroup out of memory: Killed process 3370438 (crosvm) ....
....
[462037.318059] Memory cgroup out of memory: Killed process 4171372 (adb) ....
[462037.348314] watchdog: BUG: soft lockup - CPU#64 stuck for 26s! [stat_manager-ag:1618982]
....
Quick look at why this is so slow, it seems to be related to serial flush
for certain machine types. For all the crashes I saw, the target CPU was
at console_flush_all().
In the case above, there are thousands of processes in the cgroup, and it
is soft locking up before it reaches the 1024 limit in the code (which
would call the cond_resched()). So, cond_resched() in 1024 blocks is not
sufficient.
Remove the counter-based conditional rescheduling logic and call
cond_resched() unconditionally after each task iteration, after fn() is
called. This avoids the lockup independently of how slow fn() is.
Link: https://lkml.kernel.org/r/20250523-memcg_fix-v1-1-ad3eafb60477@debian.org
Fixes: ade81479c7dd ("memcg: fix soft lockup in the OOM process")
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Suggested-by: Rik van Riel <riel(a)surriel.com>
Acked-by: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Michael van der Westhuizen <rmikey(a)meta.com>
Cc: Usama Arif <usamaarif642(a)gmail.com>
Cc: Pavel Begunkov <asml.silence(a)gmail.com>
Cc: Chen Ridong <chenridong(a)huawei.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
--- a/mm/memcontrol.c~memcg-always-call-cond_resched-after-fn
+++ a/mm/memcontrol.c
@@ -1168,7 +1168,6 @@ void mem_cgroup_scan_tasks(struct mem_cg
{
struct mem_cgroup *iter;
int ret = 0;
- int i = 0;
BUG_ON(mem_cgroup_is_root(memcg));
@@ -1178,10 +1177,9 @@ void mem_cgroup_scan_tasks(struct mem_cg
css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
while (!ret && (task = css_task_iter_next(&it))) {
- /* Avoid potential softlockup warning */
- if ((++i & 1023) == 0)
- cond_resched();
ret = fn(task, arg);
+ /* Avoid potential softlockup warning */
+ cond_resched();
}
css_task_iter_end(&it);
if (ret) {
_
Patches currently in -mm which might be from leitao(a)debian.org are
memcg-always-call-cond_resched-after-fn.patch
The breaking commit has been present since v6.4.
Although the fix is present in v6.15-rc4, v6.12.26 and v6.14.5
it is still missing in 6.6.y stable tree.
Brett Creeley (1):
pds_core: Prevent possible adminq overflow/stuck condition
drivers/net/ethernet/amd/pds_core/core.c | 5 +----
drivers/net/ethernet/amd/pds_core/core.h | 2 +-
2 files changed, 2 insertions(+), 5 deletions(-)
--
2.43.0
From: Xin Li <xin(a)zytor.com>
Clear the software event flag in the augmented SS to prevent infinite
SIGTRAP handler loop if TF is used without an external debugger.
Following is a typical single-stepping flow for a user process:
1) The user process is prepared for single-stepping by setting
RFLAGS.TF = 1.
2) When any instruction in user space completes, a #DB is triggered.
3) The kernel handles the #DB and returns to user space, invoking the
SIGTRAP handler with RFLAGS.TF = 0.
4) After the SIGTRAP handler finishes, the user process performs a
sigreturn syscall, restoring the original state, including
RFLAGS.TF = 1.
5) Goto step 2.
According to the FRED specification:
A) Bit 17 in the augmented SS is designated as the software event
flag, which is set to 1 for FRED event delivery of SYSCALL,
SYSENTER, or INT n.
B) If bit 17 of the augmented SS is 1 and ERETU would result in
RFLAGS.TF = 1, a single-step trap will be pending upon completion
of ERETU.
In step 4) above, the software event flag is set upon the sigreturn
syscall, and its corresponding ERETU would restore RFLAGS.TF = 1.
This combination causes a pending single-step trap upon completion of
ERETU. Therefore, another #DB is triggered before any user space
instruction is executed, which leads to an infinite loop in which the
SIGTRAP handler keeps being invoked on the same user space IP.
Suggested-by: H. Peter Anvin (Intel) <hpa(a)zytor.com>
Signed-off-by: Xin Li (Intel) <xin(a)zytor.com>
Cc: stable(a)vger.kernel.org
---
Change in v2:
*) Remove the check cpu_feature_enabled(X86_FEATURE_FRED), because
regs->fred_ss.swevent will always be 0 otherwise (H. Peter Anvin).
---
arch/x86/include/asm/sighandling.h | 19 +++++++++++++++++++
arch/x86/kernel/signal_32.c | 4 ++++
arch/x86/kernel/signal_64.c | 4 ++++
3 files changed, 27 insertions(+)
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index e770c4fc47f4..637f7705f0b2 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -24,4 +24,23 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+/*
+ * To prevent infinite SIGTRAP handler loop if TF is used without an external
+ * debugger, clear the software event flag in the augmented SS, ensuring no
+ * single-step trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original state
+ * is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+ /*
+ * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+ * is being single-stepped, do not clear the software event flag in the
+ * augmented SS, thus a debugger won't skip over the following instruction.
+ */
+ if (IS_ENABLED(CONFIG_X86_FRED) && !(regs->flags & X86_EFLAGS_TF))
+ regs->fred_ss.swevent = 0;
+}
+
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 98123ff10506..42bbc42bd350 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn)
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
@@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn)
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ee9453891901..d483b585c6c6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
@@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
if (!access_ok(frame, sizeof(*frame)))
base-commit: 6a7c3c2606105a41dde81002c0037420bc1ddf00
--
2.49.0
On linux-5.10.y, my testcase run failed:
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# mount -t nfs [::1]:/mnt/nfs_root /mnt/v6 -o nfsvers=3
mount.nfs: requested NFS version or transport protocol is not supported
The first bad commit is:
commit 7229200f68662660bb4d55f19247eaf3c79a4217
Author: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon Jun 3 10:35:02 2024 -0400
nfsd: don't allow nfsd threads to be signalled.
[ Upstream commit 3903902401451b1cd9d797a8c79769eb26ac7fe5 ]
Here is the test log:
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# dd if=/dev/zero of=/tmp/nfs.img bs=1M count=100
100+0 records in
100+0 records out
104857600 bytes (105 MB, 100 MiB) copied, 0.0386658 s, 2.7 GB/s
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# mkfs /tmp/nfs.img
mke2fs 1.46.1 (9-Feb-2021)
Discarding device blocks: 1024/102400 done
Creating filesystem with 102400 1k blocks and 25688 inodes
Filesystem UUID: 77e3bc56-46bb-4e5c-9619-d9a0c0999958
Superblock backups stored on blocks:
8193, 24577, 40961, 57345, 73729
Allocating group tables: 0/13 done
Writing inode tables: 0/13 done
Writing superblocks and filesystem accounting information: 0/13 done
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# mount /tmp/nfs.img /mnt
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# mkdir /mnt/nfs_root
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# touch /etc/exports
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# echo '/mnt/nfs_root *(insecure,rw,async,no_root_squash)' >> /etc/exports
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# /opt/wr-test/bin/svcwp.sh nfsserver restart
stopping mountd: done
stopping nfsd: ..........failed
using signal 9:
..........failed
exportfs: /etc/exports [1]: Neither 'subtree_check' or 'no_subtree_check' specified for export "*:/mnt/nfs_root".
Assuming default behaviour ('no_subtree_check').
NOTE: this default has changed since nfs-utils version 1.0.x
starting 8 nfsd kernel threads: done
starting mountd: done
exportfs: /etc/exports [1]: Neither 'subtree_check' or 'no_subtree_check' specified for export "*:/mnt/nfs_root".
Assuming default behaviour ('no_subtree_check').
NOTE: this default has changed since nfs-utils version 1.0.x
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# echo hello > /mnt/nfs_root/hello.txt
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# mkdir /mnt/v6
root@intel-x86-64:/opt/wr-test/testcases/userspace/nfs-utils_v6# mount -t nfs [::1]:/mnt/nfs_root /mnt/v6 -o nfsvers=3
mount.nfs: requested NFS version or transport protocol is not supported
Thanks,
Haixiao
From: Maud Spierings <maudspierings(a)gocontroll.com>
Throughout the various probe functions &indio_dev->dev is used before it
is initialized. This caused a kernel panic in st_sensors_power_enable
when the call to devm_regulator_bulk_get_enable() fails and then calls
dev_err_probe() with the uninitialized device.
This seems to only cause a panic with dev_err_probe(), dev_err,
dev_warn and dev_info don't seem to cause a panic, but are fixed
as well.
Signed-off-by: Maud Spierings <maudspierings(a)gocontroll.com>
---
When I search for general &indio_dev->dev usage, I see quite a lot more
hits, but I am not sure if there are issues with those too.
This issue has existed for a long time it seems and therefore it is
nearly impossible to find a proper fixes tag. I would love to see it at
least backported to 6.12 as that is where I encountered it, and I
believe the patch should apply without conflicts.
The investigation into this issue can be found in this thread [1]
[1]: https://lore.kernel.org/all/AM7P189MB100986A83D2F28AF3FFAF976E39EA@AM7P189M…
---
Changes in v2:
- Added SoB in commit message
- Link to v1: https://lore.kernel.org/r/20250522-st_iio_fix-v1-1-d689b35f1612@gocontroll.…
---
drivers/iio/accel/st_accel_core.c | 10 +++----
drivers/iio/common/st_sensors/st_sensors_core.c | 35 +++++++++++-----------
drivers/iio/common/st_sensors/st_sensors_trigger.c | 18 +++++------
3 files changed, 31 insertions(+), 32 deletions(-)
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 99cb661fabb2d9cc1943fa8d0a6f3becb71126e6..a7961c610ed203d039bbf298c8883031a578fb0b 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1353,6 +1353,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
union acpi_object *ont;
union acpi_object *elements;
acpi_status status;
+ struct device *parent = indio_dev->dev.parent;
int ret = -EINVAL;
unsigned int val;
int i, j;
@@ -1371,7 +1372,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
};
- adev = ACPI_COMPANION(indio_dev->dev.parent);
+ adev = ACPI_COMPANION(parent);
if (!adev)
return -ENXIO;
@@ -1380,8 +1381,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
if (status == AE_NOT_FOUND) {
return -ENXIO;
} else if (ACPI_FAILURE(status)) {
- dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n",
- status);
+ dev_warn(parent, "failed to execute _ONT: %d\n", status);
return status;
}
@@ -1457,12 +1457,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
}
ret = 0;
- dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n");
+ dev_info(parent, "computed mount matrix from ACPI\n");
out:
kfree(buffer.pointer);
if (ret)
- dev_dbg(&indio_dev->dev,
+ dev_dbg(parent,
"failed to apply ACPI orientation data: %d\n", ret);
return ret;
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 8ce1dccfea4f5aaff45d3d40f6542323dd1f0b09..11cbf561b16d41f429745abb516c137cfbb302bb 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -154,7 +154,7 @@ static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs)
return err;
st_accel_set_fullscale_error:
- dev_err(&indio_dev->dev, "failed to set new fullscale.\n");
+ dev_err(indio_dev->dev.parent, "failed to set new fullscale.\n");
return err;
}
@@ -231,7 +231,7 @@ int st_sensors_power_enable(struct iio_dev *indio_dev)
ARRAY_SIZE(regulator_names),
regulator_names);
if (err)
- return dev_err_probe(&indio_dev->dev, err,
+ return dev_err_probe(parent, err,
"unable to enable supplies\n");
return 0;
@@ -241,13 +241,14 @@ EXPORT_SYMBOL_NS(st_sensors_power_enable, "IIO_ST_SENSORS");
static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
/* Sensor does not support interrupts */
if (!sdata->sensor_settings->drdy_irq.int1.addr &&
!sdata->sensor_settings->drdy_irq.int2.addr) {
if (pdata->drdy_int_pin)
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"DRDY on pin INT%d specified, but sensor does not support interrupts\n",
pdata->drdy_int_pin);
return 0;
@@ -256,29 +257,27 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
switch (pdata->drdy_int_pin) {
case 1:
if (!sdata->sensor_settings->drdy_irq.int1.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT1 not available.\n");
+ dev_err(parent, "DRDY on INT1 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 1;
break;
case 2:
if (!sdata->sensor_settings->drdy_irq.int2.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT2 not available.\n");
+ dev_err(parent, "DRDY on INT2 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 2;
break;
default:
- dev_err(&indio_dev->dev, "DRDY on pdata not valid.\n");
+ dev_err(parent, "DRDY on pdata not valid.\n");
return -EINVAL;
}
if (pdata->open_drain) {
if (!sdata->sensor_settings->drdy_irq.int1.addr_od &&
!sdata->sensor_settings->drdy_irq.int2.addr_od)
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"open drain requested but unsupported.\n");
else
sdata->int_pin_open_drain = true;
@@ -336,6 +335,7 @@ EXPORT_SYMBOL_NS(st_sensors_dev_name_probe, "IIO_ST_SENSORS");
int st_sensors_init_sensor(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
struct st_sensors_platform_data *of_pdata;
int err = 0;
@@ -343,7 +343,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mutex_init(&sdata->odr_lock);
/* If OF/DT pdata exists, it will take precedence of anything else */
- of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata);
+ of_pdata = st_sensors_dev_probe(parent, pdata);
if (IS_ERR(of_pdata))
return PTR_ERR(of_pdata);
if (of_pdata)
@@ -370,7 +370,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
if (err < 0)
return err;
} else
- dev_info(&indio_dev->dev, "Full-scale not possible\n");
+ dev_info(parent, "Full-scale not possible\n");
err = st_sensors_set_odr(indio_dev, sdata->odr);
if (err < 0)
@@ -405,7 +405,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mask = sdata->sensor_settings->drdy_irq.int2.mask_od;
}
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"set interrupt line to open drain mode on pin %d\n",
sdata->drdy_int_pin);
err = st_sensors_write_data_with_mask(indio_dev, addr,
@@ -593,21 +593,20 @@ EXPORT_SYMBOL_NS(st_sensors_get_settings_index, "IIO_ST_SENSORS");
int st_sensors_verify_id(struct iio_dev *indio_dev)
{
struct st_sensor_data *sdata = iio_priv(indio_dev);
+ struct device *parent = indio_dev->dev.parent;
int wai, err;
if (sdata->sensor_settings->wai_addr) {
err = regmap_read(sdata->regmap,
sdata->sensor_settings->wai_addr, &wai);
if (err < 0) {
- dev_err(&indio_dev->dev,
- "failed to read Who-Am-I register.\n");
- return err;
+ return dev_err_probe(parent, err,
+ "failed to read Who-Am-I register.\n");
}
if (sdata->sensor_settings->wai != wai) {
- dev_warn(&indio_dev->dev,
- "%s: WhoAmI mismatch (0x%x).\n",
- indio_dev->name, wai);
+ dev_warn(parent, "%s: WhoAmI mismatch (0x%x).\n",
+ indio_dev->name, wai);
}
}
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 9d4bf822a15dfcdd6c2835f6b9d7698cd3cb0b08..32c3278968089699dff5329e943d92b151b55fdf 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -127,7 +127,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig = devm_iio_trigger_alloc(parent, "%s-trigger",
indio_dev->name);
if (sdata->trig == NULL) {
- dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n");
+ dev_err(parent, "failed to allocate iio trigger.\n");
return -ENOMEM;
}
@@ -143,7 +143,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
case IRQF_TRIGGER_FALLING:
case IRQF_TRIGGER_LOW:
if (!sdata->sensor_settings->drdy_irq.addr_ihl) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"falling/low specified for IRQ but hardware supports only rising/high: will request rising/high\n");
if (irq_trig == IRQF_TRIGGER_FALLING)
irq_trig = IRQF_TRIGGER_RISING;
@@ -156,21 +156,21 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->sensor_settings->drdy_irq.mask_ihl, 1);
if (err < 0)
return err;
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts on the falling edge or active low level\n");
}
break;
case IRQF_TRIGGER_RISING:
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts on the rising edge\n");
break;
case IRQF_TRIGGER_HIGH:
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts active high level\n");
break;
default:
/* This is the most preferred mode, if possible */
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"unsupported IRQ trigger specified (%lx), enforce rising edge\n", irq_trig);
irq_trig = IRQF_TRIGGER_RISING;
}
@@ -179,7 +179,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
if (irq_trig == IRQF_TRIGGER_FALLING ||
irq_trig == IRQF_TRIGGER_RISING) {
if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"edge IRQ not supported w/o stat register.\n");
return -EOPNOTSUPP;
}
@@ -214,13 +214,13 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig->name,
sdata->trig);
if (err) {
- dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n");
+ dev_err(parent, "failed to request trigger IRQ.\n");
return err;
}
err = devm_iio_trigger_register(parent, sdata->trig);
if (err < 0) {
- dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
+ dev_err(parent, "failed to register iio trigger.\n");
return err;
}
indio_dev->trig = iio_trigger_get(sdata->trig);
---
base-commit: 7bac2c97af4078d7a627500c9bcdd5b033f97718
change-id: 20250522-st_iio_fix-1c58fdd4d420
Best regards,
--
Maud Spierings <maudspierings(a)gocontroll.com>
From: Michael Kelley <mhklinux(a)outlook.com>
The Hyper-V host provides guest VMs with a range of MMIO addresses
that guest VMBus drivers can use. The VMBus driver in Linux manages
that MMIO space, and allocates portions to drivers upon request. As
part of managing that MMIO space in a Generation 2 VM, the VMBus
driver must reserve the portion of the MMIO space that Hyper-V has
designated for the synthetic frame buffer, and not allocate this
space to VMBus drivers other than graphics framebuffer drivers. The
synthetic frame buffer MMIO area is described by the screen_info data
structure that is passed to the Linux kernel at boot time, so the
VMBus driver must access screen_info for Generation 2 VMs. (In
Generation 1 VMs, the framebuffer MMIO space is communicated to
the guest via a PCI pseudo-device, and access to screen_info is
not needed.)
In commit a07b50d80ab6 ("hyperv: avoid dependency on screen_info")
the VMBus driver's access to screen_info is restricted to when
CONFIG_SYSFB is enabled. CONFIG_SYSFB is typically enabled in kernels
built for Hyper-V by virtue of having at least one of CONFIG_FB_EFI,
CONFIG_FB_VESA, or CONFIG_SYSFB_SIMPLEFB enabled, so the restriction
doesn't usually affect anything. But it's valid to have none of these
enabled, in which case CONFIG_SYSFB is not enabled, and the VMBus driver
is unable to properly reserve the framebuffer MMIO space for graphics
framebuffer drivers. The framebuffer MMIO space may be assigned to
some other VMBus driver, with undefined results. As an example, if
a VM is using a PCI pass-thru NVMe controller to host the OS disk,
the PCI NVMe controller is probed before any graphics devices, and the
NVMe controller is assigned a portion of the framebuffer MMIO space.
Hyper-V reports an error to Linux during the probe, and the OS disk
fails to get setup. Then Linux fails to boot in the VM.
Fix this by having CONFIG_HYPERV always select SYSFB. Then the
VMBus driver in a Gen 2 VM can always reserve the MMIO space for the
graphics framebuffer driver, and prevent the undefined behavior. But
don't select SYSFB when building for HYPERV_VTL_MODE as VTLs other
than VTL 0 don't have a framebuffer and aren't subject to the issue.
Adding SYSFB in such cases is harmless, but would increase the image
size for no purpose.
Fixes: a07b50d80ab6 ("hyperv: avoid dependency on screen_info")
Signed-off-by: Michael Kelley <mhklinux(a)outlook.com>
Reviewed-by: Saurabh Sengar <ssengar(a)linux.microsoft.com>
---
Changes in v2:
* Made "select SYSFB" conditional on not being a build for
VTL mode (Saurabh Sengar)
drivers/hv/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index eefa0b559b73..1cd188b73b74 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -9,6 +9,7 @@ config HYPERV
select PARAVIRT
select X86_HV_CALLBACK_VECTOR if X86
select OF_EARLY_FLATTREE if OF
+ select SYSFB if !HYPERV_VTL_MODE
help
Select this option to run Linux as a Hyper-V client operating
system.
--
2.25.1
Good Day,
How are you?
My name is Calib Cassim from Eskom Holdings Ltd. SA. I got your email from
my personal search.
I have in my position an (over-invoice / over-estimated) contract amount
executed by a Foreign Contractor through my Department, which the
contractor has been paid, and left the remaining amount of $25.5M with the
payment Management.
So, I need your help to receive the amount on my behalf as a Co-contractor,
I will obtain all the needed legal documents for the transfer to you for an
investment in your area.
If you can handle it, please reply with your phone number for more
discussions and guidelines.
Thanks,
Mr. Calib
In case hfsplus_uni2asc() is called with reused buffer there is a
possibility that the buffer contains remains of the last string and the
null character is only after that. This can and has caused problems in
functions that call hfsplus_uni2asc().
Also correct the error handling for call to copy_name() where the above
problem caused error to be not passed in hfsplus_listxattr().
Fixes: 7dcbf17e3f91 ("hfsplus: refactor copy_name to not use strncpy")
Cc: stable(a)vger.kernel.org
Cc: linux-fsdevel(a)vger.kernel.org
Signed-off-by: Aaro Mäkinen <aaro(a)tuxera.com>
Reviewed-by: Anton Altaparmakov <anton(a)tuxera.com>
---
Changes in v2:
- Add Cc tag to sign-off area
---
fs/hfsplus/unicode.c | 1 +
fs/hfsplus/xattr.c | 13 ++++++++++---
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 73342c925a4b..1f122e3c9583 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -246,6 +246,7 @@ int hfsplus_uni2asc(struct super_block *sb,
res = 0;
out:
*len_p = (char *)op - astr;
+ *op = '\0';
return res;
}
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 9a1a93e3888b..f20487ad4e8a 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -746,9 +746,16 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
if (size < (res + name_len(strbuf, xattr_name_len))) {
res = -ERANGE;
goto end_listxattr;
- } else
- res += copy_name(buffer + res,
- strbuf, xattr_name_len);
+ } else {
+ err = copy_name(buffer + res,
+ strbuf, xattr_name_len);
+ if (err < 0) {
+ res = err;
+ goto end_listxattr;
+ }
+ else
+ res += err;
+ }
}
if (hfs_brec_goto(&fd, 1))
--
2.43.0
The MANA driver's probe registers netdevice via the following call chain:
mana_probe()
register_netdev()
register_netdevice()
register_netdevice() calls notifier callback for netvsc driver,
holding the netdev mutex via netdev_lock_ops().
Further this netvsc notifier callback end up attempting to acquire the
same lock again in dev_xdp_propagate() leading to deadlock.
netvsc_netdev_event()
netvsc_vf_setxdp()
dev_xdp_propagate()
This deadlock was not observed so far because net_shaper_ops was never set,
and thus the lock was effectively a no-op in this case. Fix this by using
netif_xdp_propagate() instead of dev_xdp_propagate() to avoid recursive
locking in this path.
Also, clean up the unregistration path by removing the unnecessary call to
netvsc_vf_setxdp(), since unregister_netdevice_many_notify() already
performs this cleanup via dev_xdp_uninstall().
Fixes: 97246d6d21c2 ("net: hold netdev instance lock during ndo_bpf")
Cc: stable(a)vger.kernel.org
Signed-off-by: Saurabh Sengar <ssengar(a)linux.microsoft.com>
Tested-by: Erni Sri Satya Vennela <ernis(a)linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz(a)microsoft.com>
---
[V2]
- Modified commit message
drivers/net/hyperv/netvsc_bpf.c | 2 +-
drivers/net/hyperv/netvsc_drv.c | 2 --
net/core/dev.c | 1 +
3 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index e01c5997a551..1dd3755d9e6d 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
- ret = dev_xdp_propagate(vf_netdev, &xdp);
+ ret = netif_xdp_propagate(vf_netdev, &xdp);
if (ret && prog)
bpf_prog_put(prog);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index d8b169ac0343..ee3aaf9c10e6 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2462,8 +2462,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
- netvsc_vf_setxdp(vf_netdev, NULL);
-
reinit_completion(&net_device_ctx->vf_add);
netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
diff --git a/net/core/dev.c b/net/core/dev.c
index fccf2167b235..8c6c9d7fba26 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9953,6 +9953,7 @@ int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
return dev->netdev_ops->ndo_bpf(dev, bpf);
}
+EXPORT_SYMBOL_GPL(netif_xdp_propagate);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
--
2.43.0
On Thu, May 22, 2025 at 05:30:09PM -0400, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> x86/relocs: Handle R_X86_64_REX_GOTPCRELX relocations
>
> to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> x86-relocs-handle-r_x86_64_rex_gotpcrelx-relocations.patch
> and it can be found in the queue-6.14 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
>
> commit d8e603969259e50aa632d1a3fde8883f41e26150
> Author: Brian Gerst <brgerst(a)gmail.com>
> Date: Thu Jan 23 14:07:37 2025 -0500
>
> x86/relocs: Handle R_X86_64_REX_GOTPCRELX relocations
>
> [ Upstream commit cb7927fda002ca49ae62e2782c1692acc7b80c67 ]
>
> Clang may produce R_X86_64_REX_GOTPCRELX relocations when redefining the
> stack protector location. Treat them as another type of PC-relative
> relocation.
>
> Signed-off-by: Brian Gerst <brgerst(a)gmail.com>
> Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
> Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org>
> Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
> Link: https://lore.kernel.org/r/20250123190747.745588-6-brgerst@gmail.com
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
> index e937be979ec86..92a1e503305ef 100644
> --- a/arch/x86/tools/relocs.c
> +++ b/arch/x86/tools/relocs.c
> @@ -32,6 +32,11 @@ static struct relocs relocs32;
> static struct relocs relocs32neg;
> static struct relocs relocs64;
> # define FMT PRIu64
> +
> +#ifndef R_X86_64_REX_GOTPCRELX
> +# define R_X86_64_REX_GOTPCRELX 42
> +#endif
> +
> #else
> # define FMT PRIu32
> #endif
> @@ -227,6 +232,7 @@ static const char *rel_type(unsigned type)
> REL_TYPE(R_X86_64_PC16),
> REL_TYPE(R_X86_64_8),
> REL_TYPE(R_X86_64_PC8),
> + REL_TYPE(R_X86_64_REX_GOTPCRELX),
> #else
> REL_TYPE(R_386_NONE),
> REL_TYPE(R_386_32),
> @@ -861,6 +867,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
>
> case R_X86_64_PC32:
> case R_X86_64_PLT32:
> + case R_X86_64_REX_GOTPCRELX:
> /*
> * PC relative relocations don't need to be adjusted unless
> * referencing a percpu symbol.
Didn't Ard just say this has no purpose in stable?
https://lore.kernel.org/CAMj1kXGtasdqRPn8koNN095VEEU4K409QvieMdgGXNUK0kPgkw…
Cheers,
Nathan
Customer is reporting a really subtle issue where we get random DMAR
faults, hangs and other nasties for kernel migration jobs when stressing
stuff like s2idle/s3/s4. The explosions seems to happen somewhere
after resuming the system with splats looking something like:
PM: suspend exit
rfkill: input handler disabled
xe 0000:00:02.0: [drm] GT0: Engine reset: engine_class=bcs, logical_mask: 0x2, guc_id=0
xe 0000:00:02.0: [drm] GT0: Timedout job: seqno=24496, lrc_seqno=24496, guc_id=0, flags=0x13 in no process [-1]
xe 0000:00:02.0: [drm] GT0: Kernel-submitted job timed out
The likely cause appears to be a race between suspend cancelling the
worker that processes the free_job()'s, such that we still have pending
jobs to be freed after the cancel. Following from this, on resume the
pending_list will now contain at least one already complete job, but it
looks like we call drm_sched_resubmit_jobs(), which will then call
run_job() on everything still on the pending_list. But if the job was
already complete, then all the resources tied to the job, like the bb
itself, any memory that is being accessed, the iommu mappings etc. might
be long gone since those are usually tied to the fence signalling.
This scenario can be seen in ftrace when running a slightly modified
xe_pm (kernel was only modified to inject artificial latency into
free_job to make the race easier to hit):
xe_sched_job_run: dev=0000:00:02.0, fence=0xffff888276cc8540, seqno=0, lrc_seqno=0, gt=0, guc_id=0, batch_addr=0x000000146910 ...
xe_exec_queue_stop: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x0, flags=0x13
xe_exec_queue_stop: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=1, guc_state=0x0, flags=0x4
xe_exec_queue_stop: dev=0000:00:02.0, 4:0x1, gt=1, width=1, guc_id=0, guc_state=0x0, flags=0x3
xe_exec_queue_stop: dev=0000:00:02.0, 1:0x1, gt=1, width=1, guc_id=1, guc_state=0x0, flags=0x3
xe_exec_queue_stop: dev=0000:00:02.0, 4:0x1, gt=1, width=1, guc_id=2, guc_state=0x0, flags=0x3
xe_exec_queue_resubmit: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x0, flags=0x13
xe_sched_job_run: dev=0000:00:02.0, fence=0xffff888276cc8540, seqno=0, lrc_seqno=0, gt=0, guc_id=0, batch_addr=0x000000146910 ...
.....
xe_exec_queue_memory_cat_error: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x3, flags=0x13
So the job_run() is clearly triggered twice, even though the first must
have already signalled to completion during suspend. We can also see a
CAT error after the re-submit.
To prevent this try to call xe_sched_stop() to forcefully remove
anything on the pending_list that has already signalled, before we
re-submit.
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4856
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: William Tseng <william.tseng(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_gpu_scheduler.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index c250ea773491..9315da58d02d 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -51,6 +51,7 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
{
+ xe_sched_stop(sched);
drm_sched_resubmit_jobs(&sched->base);
}
--
2.49.0
If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and
opened (ie. vma_link() fails), the error is handled by closing the new
vma. This updates the hugetlbfs reservation counter of the reservation
map which at this point is referenced by both the source vma and the new
copy. As a result, once the new vma has been freed and copy_vma()
returns, the reservation counter for the source vma will be incorrect.
This patch addresses this corner case by clearing the hugetlb private
page reservation reference for the new vma and decrementing the
reference before closing the vma, so that vma_close() won't update the
reservation counter.
The issue was reported by a private syzbot instance, see the error
report log [1] and reproducer [2]. Possible duplicate of public syzbot
report [3].
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Cc: stable(a)vger.kernel.org # 6.12+
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1]
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [2]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [3]
---
mm/vma.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mm/vma.c b/mm/vma.c
index 839d12f02c885d3338d8d233583eb302d82bb80b..9d9f699ace977c9c869e5da5f88f12be183adcfb 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1834,6 +1834,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma;
out_vma_link:
+ if (is_vm_hugetlb_page(new_vma))
+ clear_vma_resv_huge_pages(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
---
base-commit: 94305e83eccb3120c921cd3a015cd74731140bac
change-id: 20250523-warning_in_page_counter_cancel-e8c71a6b4c88
From: Xin Li <xin(a)zytor.com>
Clear the software event flag in the augmented SS to prevent infinite
SIGTRAP handler loop if TF is used without an external debugger.
Following is a typical single-stepping flow for a user process:
1) The user process is prepared for single-stepping by setting
RFLAGS.TF = 1.
2) When any instruction in user space completes, a #DB is triggered.
3) The kernel handles the #DB and returns to user space, invoking the
SIGTRAP handler with RFLAGS.TF = 0.
4) After the SIGTRAP handler finishes, the user process performs a
sigreturn syscall, restoring the original state, including
RFLAGS.TF = 1.
5) Goto step 2.
According to the FRED specification:
A) Bit 17 in the augmented SS is designated as the software event
flag, which is set to 1 for FRED event delivery of SYSCALL,
SYSENTER, or INT n.
B) If bit 17 of the augmented SS is 1 and ERETU would result in
RFLAGS.TF = 1, a single-step trap will be pending upon completion
of ERETU.
In step 4) above, the software event flag is set upon the sigreturn
syscall, and its corresponding ERETU would restore RFLAGS.TF = 1.
This combination causes a pending single-step trap upon completion of
ERETU. Therefore, another #DB is triggered before any user space
instruction is executed, which leads to an infinite loop in which the
SIGTRAP handler keeps being invoked on the same user space IP.
Suggested-by: H. Peter Anvin (Intel) <hpa(a)zytor.com>
Signed-off-by: Xin Li (Intel) <xin(a)zytor.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/include/asm/sighandling.h | 20 ++++++++++++++++++++
arch/x86/kernel/signal_32.c | 4 ++++
arch/x86/kernel/signal_64.c | 4 ++++
3 files changed, 28 insertions(+)
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index e770c4fc47f4..ecb0411fe88c 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -24,4 +24,24 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+/*
+ * To prevent infinite SIGTRAP handler loop if TF is used without an external
+ * debugger, clear the software event flag in the augmented SS, ensuring no
+ * single-step trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original state
+ * is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+ /*
+ * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+ * is being single-stepped, do not clear the software event flag in the
+ * augmented SS, thus a debugger won't skip over the following instruction.
+ */
+ if (IS_ENABLED(CONFIG_X86_FRED) && cpu_feature_enabled(X86_FEATURE_FRED) &&
+ !(regs->flags & X86_EFLAGS_TF))
+ regs->fred_ss.swevent = 0;
+}
+
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 98123ff10506..42bbc42bd350 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn)
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
@@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn)
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ee9453891901..d483b585c6c6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
@@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
if (!access_ok(frame, sizeof(*frame)))
base-commit: 6a7c3c2606105a41dde81002c0037420bc1ddf00
--
2.49.0
From: Xin Li <xin(a)zytor.com>
Clear the software event flag in the augmented SS to prevent infinite
SIGTRAP handler loop if TF is used without an external debugger.
Following is a typical single-stepping flow for a user process:
1) The user process is prepared for single-stepping by setting
RFLAGS.TF = 1.
2) When any instruction in user space completes, a #DB is triggered.
3) The kernel handles the #DB and returns to user space, invoking the
SIGTRAP handler with RFLAGS.TF = 0.
4) After the SIGTRAP handler finishes, the user process performs a
sigreturn syscall, restoring the original state, including
RFLAGS.TF = 1.
5) Goto step 2.
According to the FRED specification:
A) Bit 17 in the augmented SS is designated as the software event
flag, which is set to 1 for FRED event delivery of SYSCALL,
SYSENTER, or INT n.
B) If bit 17 of the augmented SS is 1 and ERETU would result in
RFLAGS.TF = 1, a single-step trap will be pending upon completion
of ERETU.
In step 4) above, the software event flag is set upon the sigreturn
syscall, and its corresponding ERETU would restore RFLAGS.TF = 1.
This combination causes a pending single-step trap upon completion of
ERETU. Therefore, another #DB is triggered before any user space
instruction is executed, which leads to an infinite loop in which the
SIGTRAP handler keeps being invoked on the same user space IP.
Suggested-by: H. Peter Anvin (Intel) <hpa(a)zytor.com>
Signed-off-by: Xin Li (Intel) <xin(a)zytor.com>
Cc: stable(a)vger.kernel.org
---
Change in v3:
*) Use "#ifdef CONFIG_X86_FRED" instead of IS_ENABLED(CONFIG_X86_FRED)
(Intel LKP).
Change in v2:
*) Remove the check cpu_feature_enabled(X86_FEATURE_FRED), because
regs->fred_ss.swevent will always be 0 otherwise (H. Peter Anvin).
---
arch/x86/include/asm/sighandling.h | 21 +++++++++++++++++++++
arch/x86/kernel/signal_32.c | 4 ++++
arch/x86/kernel/signal_64.c | 4 ++++
3 files changed, 29 insertions(+)
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index e770c4fc47f4..530eecc371fc 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -24,4 +24,25 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+/*
+ * To prevent infinite SIGTRAP handler loop if TF is used without an external
+ * debugger, clear the software event flag in the augmented SS, ensuring no
+ * single-step trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original state
+ * is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+ /*
+ * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+ * is being single-stepped, do not clear the software event flag in the
+ * augmented SS, thus a debugger won't skip over the following instruction.
+ */
+#ifdef CONFIG_X86_FRED
+ if (!(regs->flags & X86_EFLAGS_TF))
+ regs->fred_ss.swevent = 0;
+#endif
+}
+
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 98123ff10506..42bbc42bd350 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn)
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
@@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn)
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ee9453891901..d483b585c6c6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
@@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
if (!access_ok(frame, sizeof(*frame)))
base-commit: 6a7c3c2606105a41dde81002c0037420bc1ddf00
--
2.49.0
在 2025/5/23 06:35, Sasha Levin 写道:
> This is a note to let you know that I've just added the patch titled
>
> btrfs: allow buffered write to avoid full page read if it's block aligned
>
> to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> btrfs-allow-buffered-write-to-avoid-full-page-read-i.patch
> and it can be found in the queue-6.14 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this patch from all stable branches.
Although this patch mentions a failure in fstests, it acts more like an
optimization for btrfs.
Furthermore it relies quite some patches that may not be in stable kernels.
Without all the dependency, this can lead to data corruption.
Please drop this one from all stable kernels.
Thanks,
Qu
>
>
>
> commit de0860d610aaaee77a8c5c713c41fea584ac83b3
> Author: Qu Wenruo <wqu(a)suse.com>
> Date: Wed Oct 30 17:04:02 2024 +1030
>
> btrfs: allow buffered write to avoid full page read if it's block aligned
>
> [ Upstream commit 0d31ca6584f21821c708752d379871b9fce2dc48 ]
>
> [BUG]
> Since the support of block size (sector size) < page size for btrfs,
> test case generic/563 fails with 4K block size and 64K page size:
>
> --- tests/generic/563.out 2024-04-25 18:13:45.178550333 +0930
> +++ /home/adam/xfstests-dev/results//generic/563.out.bad 2024-09-30 09:09:16.155312379 +0930
> @@ -3,7 +3,8 @@
> read is in range
> write is in range
> write -> read/write
> -read is in range
> +read has value of 8388608
> +read is NOT in range -33792 .. 33792
> write is in range
> ...
>
> [CAUSE]
> The test case creates a 8MiB file, then does buffered write into the 8MiB
> using 4K block size, to overwrite the whole file.
>
> On 4K page sized systems, since the write range covers the full block and
> page, btrfs will not bother reading the page, just like what XFS and EXT4
> do.
>
> But on 64K page sized systems, although the 4K sized write is still block
> aligned, it's not page aligned anymore, thus btrfs will read the full
> page, which will be accounted by cgroup and fail the test.
>
> As the test case itself expects such 4K block aligned write should not
> trigger any read.
>
> Such expected behavior is an optimization to reduce folio reads when
> possible, and unfortunately btrfs does not implement such optimization.
>
> [FIX]
> To skip the full page read, we need to do the following modification:
>
> - Do not trigger full page read as long as the buffered write is block
> aligned
> This is pretty simple by modifying the check inside
> prepare_uptodate_page().
>
> - Skip already uptodate blocks during full page read
> Or we can lead to the following data corruption:
>
> 0 32K 64K
> |///////| |
>
> Where the file range [0, 32K) is dirtied by buffered write, the
> remaining range [32K, 64K) is not.
>
> When reading the full page, since [0,32K) is only dirtied but not
> written back, there is no data extent map for it, but a hole covering
> [0, 64k).
>
> If we continue reading the full page range [0, 64K), the dirtied range
> will be filled with 0 (since there is only a hole covering the whole
> range).
> This causes the dirtied range to get lost.
>
> With this optimization, btrfs can pass generic/563 even if the page size
> is larger than fs block size.
>
> Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
> Signed-off-by: Qu Wenruo <wqu(a)suse.com>
> Signed-off-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 06922529f19dc..13b5359ea1b77 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -974,6 +974,10 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
> end_folio_read(folio, true, cur, iosize);
> break;
> }
> + if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) {
> + end_folio_read(folio, true, cur, blocksize);
> + continue;
> + }
> em = get_extent_map(BTRFS_I(inode), folio, cur, end - cur + 1, em_cached);
> if (IS_ERR(em)) {
> end_folio_read(folio, false, cur, end + 1 - cur);
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index cd4e40a719186..61ad1a79e5698 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -804,14 +804,15 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64
> {
> u64 clamp_start = max_t(u64, pos, folio_pos(folio));
> u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio));
> + const u32 blocksize = inode_to_fs_info(inode)->sectorsize;
> int ret = 0;
>
> if (folio_test_uptodate(folio))
> return 0;
>
> if (!force_uptodate &&
> - IS_ALIGNED(clamp_start, PAGE_SIZE) &&
> - IS_ALIGNED(clamp_end, PAGE_SIZE))
> + IS_ALIGNED(clamp_start, blocksize) &&
> + IS_ALIGNED(clamp_end, blocksize))
> return 0;
>
> ret = btrfs_read_folio(NULL, folio);
submit_bio() may be called recursively. To limit the stack depth, recursive
calls result in bios being added to a list (current->bio_list).
__submit_bio_noacct() sets up that list and maintains two lists with
requests:
* bio_list_on_stack[0] is the list with bios submitted by recursive
submit_bio() calls from inside the latest __submit_bio() call.
* bio_list_on_stack[1] is the list with bios submitted by recursive
submit_bio() calls from inside previous __submit_bio() calls.
Make sure that bios are submitted to lower devices in the order these
have been submitted by submit_bio() by adding new bios at the end of the
list instead of at the front.
This patch fixes unaligned write errors that I encountered with F2FS
submitting zoned writes to a dm driver stacked on top of a zoned UFS
device.
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Damien Le Moal <dlemoal(a)kernel.org>
Cc: Yu Kuai <yukuai1(a)huaweicloud.com>
Cc: Ming Lei <ming.lei(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
block/blk-core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index b862c66018f2..4b728fa1c138 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -704,9 +704,9 @@ static void __submit_bio_noacct(struct bio *bio)
/*
* Now assemble so we handle the lowest level first.
*/
+ bio_list_on_stack[0] = bio_list_on_stack[1];
bio_list_merge(&bio_list_on_stack[0], &lower);
bio_list_merge(&bio_list_on_stack[0], &same);
- bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
current->bio_list = NULL;
On 5/22/25 4:10 PM, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> f2fs: defer readonly check vs norecovery
>
> to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> f2fs-defer-readonly-check-vs-norecovery.patch
> and it can be found in the queue-6.14 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
I already replied to the AUTOSEL email on 5/5 saying that this
is not a bug fix and should not be in the stable tree, but here we are.
> commit 442e4090bb78d5dce4506a591214ce2447d6ea50
> Author: Eric Sandeen <sandeen(a)redhat.com>
> Date: Mon Mar 3 11:12:17 2025 -0600
>
> f2fs: defer readonly check vs norecovery
>
> [ Upstream commit 9cca49875997a1a7e92800a828a62bacb0f577b9 ]
>
> Defer the readonly-vs-norecovery check until after option parsing is done
> so that option parsing does not require an active superblock for the test.
> Add a helpful message, while we're at it.
>
> (I think could be moved back into parsing after we switch to the new mount
> API if desired, as the fs context will have RO state available.)
>
> Signed-off-by: Eric Sandeen <sandeen(a)redhat.com>
> Reviewed-by: Chao Yu <chao(a)kernel.org>
> Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index b8a0e925a4011..d3b04a589b525 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -728,10 +728,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> set_opt(sbi, DISABLE_ROLL_FORWARD);
> break;
> case Opt_norecovery:
> - /* this option mounts f2fs with ro */
> + /* requires ro mount, checked in f2fs_default_check */
> set_opt(sbi, NORECOVERY);
> - if (!f2fs_readonly(sb))
> - return -EINVAL;
> break;
> case Opt_discard:
> if (!f2fs_hw_support_discard(sbi)) {
> @@ -1418,6 +1416,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> f2fs_err(sbi, "Allow to mount readonly mode only");
> return -EROFS;
> }
> +
> + if (test_opt(sbi, NORECOVERY) && !f2fs_readonly(sbi->sb)) {
> + f2fs_err(sbi, "norecovery requires readonly mount");
> + return -EINVAL;
> + }
> +
> return 0;
> }
>
>
> [...]
>
> Additionally, the only difference between fixing the issue and before is
> that there is no return error handling of make_request(). But after
> previous patch cleaned md_write_start(), make_requst() only return error
> in raid5_make_request() by dm-raid, see commit 41425f96d7aa ("dm-raid456,
> md/raid456: fix a deadlock for dm-raid456 while io concurrent with
> reshape)". Since dm always splits data and flush operation into two
> separate io, io size of flush submitted by dm always is 0, make_request()
> will not be called in md_submit_flush_data(). To prevent future
> modifications from introducing issues, add WARN_ON to ensure
> make_request() no error is returned in this context.
>
> [...]
> @@ -560,8 +552,20 @@ static void md_submit_flush_data(struct work_struct *ws)
> bio_endio(bio);
> } else {
> bio->bi_opf &= ~REQ_PREFLUSH;
> - md_handle_request(mddev, bio);
> +
> + /*
> + * make_requst() will never return error here, it only
> + * returns error in raid5_make_request() by dm-raid.
> + * Since dm always splits data and flush operation into
> + * two separate io, io size of flush submitted by dm
> + * always is 0, make_request() will not be called here.
> + */
> + if (WARN_ON_ONCE(!mddev->pers->make_request(mddev, bio)))
> + bio_io_error(bio);;
> }
Hello,
It looks we can hit this WARN_ON_ONCE() after which rootfs is
switching to read-only:
May 20 15:13:35 hostname kernel: WARNING: CPU: 35 PID: 1517323 at drivers/md/md.c:621 md_submit_flush_data+0x9b/0xe0
...
May 20 15:13:35 hostname kernel: XFS (md125): log I/O error -5
May 20 15:13:35 hostname kernel: XFS (md125): Filesystem has been shut down due to log error (0x2).
May 20 15:13:35 hostname kernel: XFS (md125): Please unmount the filesystem and rectify the problem(s).
Can you double check if the following regression is actual?
Since both stable/linux-6.1.y and stable/linux-6.6.y branches don't
have b75197e86e6d ("md: Remove flush handling") there is a minor issue
with this backport.
Statement "previous patch cleaned md_write_start(), make_requst() only
return error in raid5_make_request() by dm-raid" will not work for
both branches since 03e792eaf18e ("md: change the return value type of
md_write_start to void") was not backported.
So we should either backport it, or do error handling, not the
WARN_ON_ONCE().
--
Andrew Kanner
在 2025/5/23 07:31, Sasha Levin 写道:
> This is a note to let you know that I've just added the patch titled
>
> btrfs: prevent inline data extents read from touching blocks beyond its range
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> btrfs-prevent-inline-data-extents-read-from-touching.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this one from all stable trees.
Although the patch won't cause any behavior change, the main reason for
this patch is to prepare for the subpage optimization (and future large
folios support).
Thanks,
Qu
>
>
>
> commit 98504dd74a2688ff63dba6bf1d9f8abc7f0b322e
> Author: Qu Wenruo <wqu(a)suse.com>
> Date: Fri Nov 15 19:15:34 2024 +1030
>
> btrfs: prevent inline data extents read from touching blocks beyond its range
>
> [ Upstream commit 1a5b5668d711d3d1ef447446beab920826decec3 ]
>
> Currently reading an inline data extent will zero out the remaining
> range in the page.
>
> This is not yet causing problems even for block size < page size
> (subpage) cases because:
>
> 1) An inline data extent always starts at file offset 0
> Meaning at page read, we always read the inline extent first, before
> any other blocks in the page. Then later blocks are properly read out
> and re-fill the zeroed out ranges.
>
> 2) Currently btrfs will read out the whole page if a buffered write is
> not page aligned
> So a page is either fully uptodate at buffered write time (covers the
> whole page), or we will read out the whole page first.
> Meaning there is nothing to lose for such an inline extent read.
>
> But it's still not ideal:
>
> - We're zeroing out the page twice
> Once done by read_inline_extent()/uncompress_inline(), once done by
> btrfs_do_readpage() for ranges beyond i_size.
>
> - We're touching blocks that don't belong to the inline extent
> In the incoming patches, we can have a partial uptodate folio, of
> which some dirty blocks can exist while the page is not fully uptodate:
>
> The page size is 16K and block size is 4K:
>
> 0 4K 8K 12K 16K
> | | |/////////| |
>
> And range [8K, 12K) is dirtied by a buffered write, the remaining
> blocks are not uptodate.
>
> If range [0, 4K) contains an inline data extent, and we try to read
> the whole page, the current behavior will overwrite range [8K, 12K)
> with zero and cause data loss.
>
> So to make the behavior more consistent and in preparation for future
> changes, limit the inline data extents read to only zero out the range
> inside the first block, not the whole page.
>
> Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
> Signed-off-by: Qu Wenruo <wqu(a)suse.com>
> Signed-off-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 0da2611fb9c85..ee8c18d298758 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -6825,6 +6825,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
> {
> int ret;
> struct extent_buffer *leaf = path->nodes[0];
> + const u32 blocksize = leaf->fs_info->sectorsize;
> char *tmp;
> size_t max_size;
> unsigned long inline_size;
> @@ -6841,7 +6842,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
>
> read_extent_buffer(leaf, tmp, ptr, inline_size);
>
> - max_size = min_t(unsigned long, PAGE_SIZE, max_size);
> + max_size = min_t(unsigned long, blocksize, max_size);
> ret = btrfs_decompress(compress_type, tmp, folio, 0, inline_size,
> max_size);
>
> @@ -6853,8 +6854,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
> * cover that region here.
> */
>
> - if (max_size < PAGE_SIZE)
> - folio_zero_range(folio, max_size, PAGE_SIZE - max_size);
> + if (max_size < blocksize)
> + folio_zero_range(folio, max_size, blocksize - max_size);
> kfree(tmp);
> return ret;
> }
> @@ -6862,6 +6863,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
> static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path,
> struct folio *folio)
> {
> + const u32 blocksize = path->nodes[0]->fs_info->sectorsize;
> struct btrfs_file_extent_item *fi;
> void *kaddr;
> size_t copy_size;
> @@ -6876,14 +6878,14 @@ static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path
> if (btrfs_file_extent_compression(path->nodes[0], fi) != BTRFS_COMPRESS_NONE)
> return uncompress_inline(path, folio, fi);
>
> - copy_size = min_t(u64, PAGE_SIZE,
> + copy_size = min_t(u64, blocksize,
> btrfs_file_extent_ram_bytes(path->nodes[0], fi));
> kaddr = kmap_local_folio(folio, 0);
> read_extent_buffer(path->nodes[0], kaddr,
> btrfs_file_extent_inline_start(fi), copy_size);
> kunmap_local(kaddr);
> - if (copy_size < PAGE_SIZE)
> - folio_zero_range(folio, copy_size, PAGE_SIZE - copy_size);
> + if (copy_size < blocksize)
> + folio_zero_range(folio, copy_size, blocksize - copy_size);
> return 0;
> }
>
The patch titled
Subject: mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ge Yang <yangge1116(a)126.com>
Subject: mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios
Date: Thu, 22 May 2025 11:22:17 +0800
A kernel crash was observed when replacing free hugetlb folios:
BUG: kernel NULL pointer dereference, address: 0000000000000028
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP NOPTI
CPU: 28 UID: 0 PID: 29639 Comm: test_cma.sh Tainted 6.15.0-rc6-zp #41 PREEMPT(voluntary)
RIP: 0010:alloc_and_dissolve_hugetlb_folio+0x1d/0x1f0
RSP: 0018:ffffc9000b30fa90 EFLAGS: 00010286
RAX: 0000000000000000 RBX: 0000000000342cca RCX: ffffea0043000000
RDX: ffffc9000b30fb08 RSI: ffffea0043000000 RDI: 0000000000000000
RBP: ffffc9000b30fb20 R08: 0000000000001000 R09: 0000000000000000
R10: ffff88886f92eb00 R11: 0000000000000000 R12: ffffea0043000000
R13: 0000000000000000 R14: 00000000010c0200 R15: 0000000000000004
FS: 00007fcda5f14740(0000) GS:ffff8888ec1d8000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 0000000391402000 CR4: 0000000000350ef0
Call Trace:
<TASK>
replace_free_hugepage_folios+0xb6/0x100
alloc_contig_range_noprof+0x18a/0x590
? srso_return_thunk+0x5/0x5f
? down_read+0x12/0xa0
? srso_return_thunk+0x5/0x5f
cma_range_alloc.constprop.0+0x131/0x290
__cma_alloc+0xcf/0x2c0
cma_alloc_write+0x43/0xb0
simple_attr_write_xsigned.constprop.0.isra.0+0xb2/0x110
debugfs_attr_write+0x46/0x70
full_proxy_write+0x62/0xa0
vfs_write+0xf8/0x420
? srso_return_thunk+0x5/0x5f
? filp_flush+0x86/0xa0
? srso_return_thunk+0x5/0x5f
? filp_close+0x1f/0x30
? srso_return_thunk+0x5/0x5f
? do_dup2+0xaf/0x160
? srso_return_thunk+0x5/0x5f
ksys_write+0x65/0xe0
do_syscall_64+0x64/0x170
entry_SYSCALL_64_after_hwframe+0x76/0x7e
There is a potential race between __update_and_free_hugetlb_folio() and
replace_free_hugepage_folios():
CPU1 CPU2
__update_and_free_hugetlb_folio replace_free_hugepage_folios
folio_test_hugetlb(folio)
-- It's still hugetlb folio.
__folio_clear_hugetlb(folio)
hugetlb_free_folio(folio)
h = folio_hstate(folio)
-- Here, h is NULL pointer
When the above race condition occurs, folio_hstate(folio) returns NULL,
and subsequent access to this NULL pointer will cause the system to crash.
To resolve this issue, execute folio_hstate(folio) under the protection
of the hugetlb_lock lock, ensuring that folio_hstate(folio) does not
return NULL.
Link: https://lkml.kernel.org/r/1747884137-26685-1-git-send-email-yangge1116@126.…
Fixes: 04f13d241b8b ("mm: replace free hugepage folios after migration")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 8 ++++++++
1 file changed, 8 insertions(+)
--- a/mm/hugetlb.c~mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios
+++ a/mm/hugetlb.c
@@ -2949,12 +2949,20 @@ int replace_free_hugepage_folios(unsigne
while (start_pfn < end_pfn) {
folio = pfn_folio(start_pfn);
+
+ /*
+ * The folio might have been dissolved from under our feet, so make sure
+ * to carefully check the state under the lock.
+ */
+ spin_lock_irq(&hugetlb_lock);
if (folio_test_hugetlb(folio)) {
h = folio_hstate(folio);
} else {
+ spin_unlock_irq(&hugetlb_lock);
start_pfn++;
continue;
}
+ spin_unlock_irq(&hugetlb_lock);
if (!folio_ref_count(folio)) {
ret = alloc_and_dissolve_hugetlb_folio(h, folio,
_
Patches currently in -mm which might be from yangge1116(a)126.com are
mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch
The patch titled
Subject: mm: swap: fix potensial buffer overflow in setup_clusters()
has been added to the -mm mm-new branch. Its filename is
mm-swap-fix-potensial-buffer-overflow-in-setup_clusters.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kemeng Shi <shikemeng(a)huaweicloud.com>
Subject: mm: swap: fix potensial buffer overflow in setup_clusters()
Date: Thu, 22 May 2025 20:25:53 +0800
In setup_swap_map(), we only ensure badpages are in range (0, last_page].
As maxpages might be < last_page, setup_clusters() will encounter a buffer
overflow when a badpage is >= maxpages.
Only call inc_cluster_info_page() for badpage which is < maxpages to fix
the issue.
Link: https://lkml.kernel.org/r/20250522122554.12209-4-shikemeng@huaweicloud.com
Fixes: b843786b0bd01 ("mm: swapfile: fix SSD detection with swapfile on btrfs")
Signed-off-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Cc: <stable(a)vger.kernel.org>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Kairui Song <kasong(a)tencent.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
--- a/mm/swapfile.c~mm-swap-fix-potensial-buffer-overflow-in-setup_clusters
+++ a/mm/swapfile.c
@@ -3208,9 +3208,13 @@ static struct swap_cluster_info *setup_c
* and the EOF part of the last cluster.
*/
inc_cluster_info_page(si, cluster_info, 0);
- for (i = 0; i < swap_header->info.nr_badpages; i++)
- inc_cluster_info_page(si, cluster_info,
- swap_header->info.badpages[i]);
+ for (i = 0; i < swap_header->info.nr_badpages; i++) {
+ unsigned int page_nr = swap_header->info.badpages[i];
+
+ if (page_nr >= maxpages)
+ continue;
+ inc_cluster_info_page(si, cluster_info, page_nr);
+ }
for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
inc_cluster_info_page(si, cluster_info, i);
_
Patches currently in -mm which might be from shikemeng(a)huaweicloud.com are
mm-shmem-avoid-unpaired-folio_unlock-in-shmem_swapin_folio.patch
mm-shmem-add-missing-shmem_unacct_size-in-__shmem_file_setup.patch
mm-shmem-fix-potential-dead-loop-in-shmem_unuse.patch
mm-shmem-only-remove-inode-from-swaplist-when-its-swapped-page-count-is-0.patch
mm-shmem-remove-unneeded-xa_is_value-check-in-shmem_unuse_swap_entries.patch
mm-swap-move-nr_swap_pages-counter-decrement-from-folio_alloc_swap-to-swap_range_alloc.patch
mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop.patch
mm-swap-fix-potensial-buffer-overflow-in-setup_clusters.patch
mm-swap-remove-stale-comment-stale-comment-in-cluster_alloc_swap_entry.patch
The patch titled
Subject: mm: swap: correctly use maxpages in swapon syscall to avoid potensial deadloop
has been added to the -mm mm-new branch. Its filename is
mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kemeng Shi <shikemeng(a)huaweicloud.com>
Subject: mm: swap: correctly use maxpages in swapon syscall to avoid potensial deadloop
Date: Thu, 22 May 2025 20:25:52 +0800
We use maxpages from read_swap_header() to initialize swap_info_struct,
however the maxpages might be reduced in setup_swap_extents() and the
si->max is assigned with the reduced maxpages from the
setup_swap_extents().
Obviously, this could lead to memory waste as we allocated memory based on
larger maxpages, besides, this could lead to a potensial deadloop as
following:
1) When calling setup_clusters() with larger maxpages, unavailable
pages within range [si->max, larger maxpages) are not accounted with
inc_cluster_info_page(). As a result, these pages are assumed
available but can not be allocated. The cluster contains these pages
can be moved to frag_clusters list after it's all available pages were
allocated.
2) When the cluster mentioned in 1) is the only cluster in
frag_clusters list, cluster_alloc_swap_entry() assume order 0
allocation will never failed and will enter a deadloop by keep trying
to allocate page from the only cluster in frag_clusters which contains
no actually available page.
Call setup_swap_extents() to get the final maxpages before
swap_info_struct initialization to fix the issue.
Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com
Fixes: 661383c6111a3 ("mm: swap: relaim the cached parts that got scanned")
Signed-off-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Cc: <stable(a)vger.kernel.org>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Kairui Song <kasong(a)tencent.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 47 ++++++++++++++++++++---------------------------
1 file changed, 20 insertions(+), 27 deletions(-)
--- a/mm/swapfile.c~mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop
+++ a/mm/swapfile.c
@@ -3141,43 +3141,30 @@ static unsigned long read_swap_header(st
return maxpages;
}
-static int setup_swap_map_and_extents(struct swap_info_struct *si,
- union swap_header *swap_header,
- unsigned char *swap_map,
- unsigned long maxpages,
- sector_t *span)
+static int setup_swap_map(struct swap_info_struct *si,
+ union swap_header *swap_header,
+ unsigned char *swap_map,
+ unsigned long maxpages)
{
- unsigned int nr_good_pages;
unsigned long i;
- int nr_extents;
-
- nr_good_pages = maxpages - 1; /* omit header page */
+ swap_map[0] = SWAP_MAP_BAD; /* omit header page */
for (i = 0; i < swap_header->info.nr_badpages; i++) {
unsigned int page_nr = swap_header->info.badpages[i];
if (page_nr == 0 || page_nr > swap_header->info.last_page)
return -EINVAL;
if (page_nr < maxpages) {
swap_map[page_nr] = SWAP_MAP_BAD;
- nr_good_pages--;
+ si->pages--;
}
}
- if (nr_good_pages) {
- swap_map[0] = SWAP_MAP_BAD;
- si->max = maxpages;
- si->pages = nr_good_pages;
- nr_extents = setup_swap_extents(si, span);
- if (nr_extents < 0)
- return nr_extents;
- nr_good_pages = si->pages;
- }
- if (!nr_good_pages) {
+ if (!si->pages) {
pr_warn("Empty swap-file\n");
return -EINVAL;
}
- return nr_extents;
+ return 0;
}
#define SWAP_CLUSTER_INFO_COLS \
@@ -3217,7 +3204,7 @@ static struct swap_cluster_info *setup_c
* Mark unusable pages as unavailable. The clusters aren't
* marked free yet, so no list operations are involved yet.
*
- * See setup_swap_map_and_extents(): header page, bad pages,
+ * See setup_swap_map(): header page, bad pages,
* and the EOF part of the last cluster.
*/
inc_cluster_info_page(si, cluster_info, 0);
@@ -3354,6 +3341,15 @@ SYSCALL_DEFINE2(swapon, const char __use
goto bad_swap_unlock_inode;
}
+ si->max = maxpages;
+ si->pages = maxpages - 1;
+ nr_extents = setup_swap_extents(si, &span);
+ if (nr_extents < 0) {
+ error = nr_extents;
+ goto bad_swap_unlock_inode;
+ }
+ maxpages = si->max;
+
/* OK, set up the swap map and apply the bad block list */
swap_map = vzalloc(maxpages);
if (!swap_map) {
@@ -3365,12 +3361,9 @@ SYSCALL_DEFINE2(swapon, const char __use
if (error)
goto bad_swap_unlock_inode;
- nr_extents = setup_swap_map_and_extents(si, swap_header, swap_map,
- maxpages, &span);
- if (unlikely(nr_extents < 0)) {
- error = nr_extents;
+ error = setup_swap_map(si, swap_header, swap_map, maxpages);
+ if (error)
goto bad_swap_unlock_inode;
- }
/*
* Use kvmalloc_array instead of bitmap_zalloc as the allocation order might
_
Patches currently in -mm which might be from shikemeng(a)huaweicloud.com are
mm-shmem-avoid-unpaired-folio_unlock-in-shmem_swapin_folio.patch
mm-shmem-add-missing-shmem_unacct_size-in-__shmem_file_setup.patch
mm-shmem-fix-potential-dead-loop-in-shmem_unuse.patch
mm-shmem-only-remove-inode-from-swaplist-when-its-swapped-page-count-is-0.patch
mm-shmem-remove-unneeded-xa_is_value-check-in-shmem_unuse_swap_entries.patch
mm-swap-move-nr_swap_pages-counter-decrement-from-folio_alloc_swap-to-swap_range_alloc.patch
mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop.patch
mm-swap-fix-potensial-buffer-overflow-in-setup_clusters.patch
mm-swap-remove-stale-comment-stale-comment-in-cluster_alloc_swap_entry.patch
The patch titled
Subject: mm: swap: move nr_swap_pages counter decrement from folio_alloc_swap() to swap_range_alloc()
has been added to the -mm mm-new branch. Its filename is
mm-swap-move-nr_swap_pages-counter-decrement-from-folio_alloc_swap-to-swap_range_alloc.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kemeng Shi <shikemeng(a)huaweicloud.com>
Subject: mm: swap: move nr_swap_pages counter decrement from folio_alloc_swap() to swap_range_alloc()
Date: Thu, 22 May 2025 20:25:51 +0800
Patch series "Some randome fixes and cleanups to swapfile".
Patch 0-3 are some random fixes. Patch 4 is a cleanup. More details can
be found in respective patches.
This patch (of 4):
When folio_alloc_swap() encounters a failure in either
mem_cgroup_try_charge_swap() or add_to_swap_cache(), nr_swap_pages counter
is not decremented for allocated entry. However, the following
put_swap_folio() will increase nr_swap_pages counter unpairly and lead to
an imbalance.
Move nr_swap_pages decrement from folio_alloc_swap() to swap_range_alloc()
to pair the nr_swap_pages counting.
Link: https://lkml.kernel.org/r/20250522122554.12209-1-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20250522122554.12209-2-shikemeng@huaweicloud.com
Fixes: 0ff67f990bd45 ("mm, swap: remove swap slot cache")
Signed-off-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Reviewed-by: Kairui Song <kasong(a)tencent.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/swapfile.c~mm-swap-move-nr_swap_pages-counter-decrement-from-folio_alloc_swap-to-swap_range_alloc
+++ a/mm/swapfile.c
@@ -1115,6 +1115,7 @@ static void swap_range_alloc(struct swap
if (vm_swap_full())
schedule_work(&si->reclaim_work);
}
+ atomic_long_sub(nr_entries, &nr_swap_pages);
}
static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
@@ -1313,7 +1314,6 @@ int folio_alloc_swap(struct folio *folio
if (add_to_swap_cache(folio, entry, gfp | __GFP_NOMEMALLOC, NULL))
goto out_free;
- atomic_long_sub(size, &nr_swap_pages);
return 0;
out_free:
_
Patches currently in -mm which might be from shikemeng(a)huaweicloud.com are
mm-shmem-avoid-unpaired-folio_unlock-in-shmem_swapin_folio.patch
mm-shmem-add-missing-shmem_unacct_size-in-__shmem_file_setup.patch
mm-shmem-fix-potential-dead-loop-in-shmem_unuse.patch
mm-shmem-only-remove-inode-from-swaplist-when-its-swapped-page-count-is-0.patch
mm-shmem-remove-unneeded-xa_is_value-check-in-shmem_unuse_swap_entries.patch
mm-swap-move-nr_swap_pages-counter-decrement-from-folio_alloc_swap-to-swap_range_alloc.patch
mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop.patch
mm-swap-fix-potensial-buffer-overflow-in-setup_clusters.patch
mm-swap-remove-stale-comment-stale-comment-in-cluster_alloc_swap_entry.patch
在 2025/5/23 06:35, Sasha Levin 写道:
> This is a note to let you know that I've just added the patch titled
>
> btrfs: prevent inline data extents read from touching blocks beyond its range
>
> to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> btrfs-prevent-inline-data-extents-read-from-touching.patch
> and it can be found in the queue-6.14 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this patch.
This is again a preparation patch for larger folios support of btrfs,
and the optimization to dirty a block without reading the full page.
This patch alone doesn't cause any difference for older kernels and
should not be backported.
Thanks,
Qu
>
>
>
> commit 6a2d904623a8d1711b6b5065845d52cb3f2be60a
> Author: Qu Wenruo <wqu(a)suse.com>
> Date: Fri Nov 15 19:15:34 2024 +1030
>
> btrfs: prevent inline data extents read from touching blocks beyond its range
>
> [ Upstream commit 1a5b5668d711d3d1ef447446beab920826decec3 ]
>
> Currently reading an inline data extent will zero out the remaining
> range in the page.
>
> This is not yet causing problems even for block size < page size
> (subpage) cases because:
>
> 1) An inline data extent always starts at file offset 0
> Meaning at page read, we always read the inline extent first, before
> any other blocks in the page. Then later blocks are properly read out
> and re-fill the zeroed out ranges.
>
> 2) Currently btrfs will read out the whole page if a buffered write is
> not page aligned
> So a page is either fully uptodate at buffered write time (covers the
> whole page), or we will read out the whole page first.
> Meaning there is nothing to lose for such an inline extent read.
>
> But it's still not ideal:
>
> - We're zeroing out the page twice
> Once done by read_inline_extent()/uncompress_inline(), once done by
> btrfs_do_readpage() for ranges beyond i_size.
>
> - We're touching blocks that don't belong to the inline extent
> In the incoming patches, we can have a partial uptodate folio, of
> which some dirty blocks can exist while the page is not fully uptodate:
>
> The page size is 16K and block size is 4K:
>
> 0 4K 8K 12K 16K
> | | |/////////| |
>
> And range [8K, 12K) is dirtied by a buffered write, the remaining
> blocks are not uptodate.
>
> If range [0, 4K) contains an inline data extent, and we try to read
> the whole page, the current behavior will overwrite range [8K, 12K)
> with zero and cause data loss.
>
> So to make the behavior more consistent and in preparation for future
> changes, limit the inline data extents read to only zero out the range
> inside the first block, not the whole page.
>
> Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
> Signed-off-by: Qu Wenruo <wqu(a)suse.com>
> Signed-off-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 9a648fb130230..a7136311a13c6 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -6779,6 +6779,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
> {
> int ret;
> struct extent_buffer *leaf = path->nodes[0];
> + const u32 blocksize = leaf->fs_info->sectorsize;
> char *tmp;
> size_t max_size;
> unsigned long inline_size;
> @@ -6795,7 +6796,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
>
> read_extent_buffer(leaf, tmp, ptr, inline_size);
>
> - max_size = min_t(unsigned long, PAGE_SIZE, max_size);
> + max_size = min_t(unsigned long, blocksize, max_size);
> ret = btrfs_decompress(compress_type, tmp, folio, 0, inline_size,
> max_size);
>
> @@ -6807,14 +6808,15 @@ static noinline int uncompress_inline(struct btrfs_path *path,
> * cover that region here.
> */
>
> - if (max_size < PAGE_SIZE)
> - folio_zero_range(folio, max_size, PAGE_SIZE - max_size);
> + if (max_size < blocksize)
> + folio_zero_range(folio, max_size, blocksize - max_size);
> kfree(tmp);
> return ret;
> }
>
> static int read_inline_extent(struct btrfs_path *path, struct folio *folio)
> {
> + const u32 blocksize = path->nodes[0]->fs_info->sectorsize;
> struct btrfs_file_extent_item *fi;
> void *kaddr;
> size_t copy_size;
> @@ -6829,14 +6831,14 @@ static int read_inline_extent(struct btrfs_path *path, struct folio *folio)
> if (btrfs_file_extent_compression(path->nodes[0], fi) != BTRFS_COMPRESS_NONE)
> return uncompress_inline(path, folio, fi);
>
> - copy_size = min_t(u64, PAGE_SIZE,
> + copy_size = min_t(u64, blocksize,
> btrfs_file_extent_ram_bytes(path->nodes[0], fi));
> kaddr = kmap_local_folio(folio, 0);
> read_extent_buffer(path->nodes[0], kaddr,
> btrfs_file_extent_inline_start(fi), copy_size);
> kunmap_local(kaddr);
> - if (copy_size < PAGE_SIZE)
> - folio_zero_range(folio, copy_size, PAGE_SIZE - copy_size);
> + if (copy_size < blocksize)
> + folio_zero_range(folio, copy_size, blocksize - copy_size);
> return 0;
> }
>
在 2025/5/23 06:35, Sasha Levin 写道:
> This is a note to let you know that I've just added the patch titled
>
> btrfs: properly limit inline data extent according to block size
>
> to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> btrfs-properly-limit-inline-data-extent-according-to.patch
> and it can be found in the queue-6.14 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this patch.
This is mostly for the incoming large folios support for btrfs.
For older kernels this patch will not cause any behavior change.
Thanks,
Qu>
>
>
> commit ec02842137bdccb74ed331a1b0a335ee22eb179c
> Author: Qu Wenruo <wqu(a)suse.com>
> Date: Tue Feb 25 14:30:44 2025 +1030
>
> btrfs: properly limit inline data extent according to block size
>
> [ Upstream commit 23019d3e6617a8ec99a8d2f5947aa3dd8a74a1b8 ]
>
> Btrfs utilizes inline data extent for the following cases:
>
> - Regular small files
> - Symlinks
>
> And "btrfs check" detects any file extents that are too large as an
> error.
>
> It's not a problem for 4K block size, but for the incoming smaller
> block sizes (2K), it can cause problems due to bad limits:
>
> - Non-compressed inline data extents
> We do not allow a non-compressed inline data extent to be as large as
> block size.
>
> - Symlinks
> Currently the only real limit on symlinks are 4K, which can be larger
> than 2K block size.
>
> These will result btrfs-check to report too large file extents.
>
> Fix it by adding proper size checks for the above cases.
>
> Signed-off-by: Qu Wenruo <wqu(a)suse.com>
> Reviewed-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: David Sterba <dsterba(a)suse.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index a06fca7934d55..9a648fb130230 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -583,6 +583,10 @@ static bool can_cow_file_range_inline(struct btrfs_inode *inode,
> if (size > fs_info->sectorsize)
> return false;
>
> + /* We do not allow a non-compressed extent to be as large as block size. */
> + if (data_len >= fs_info->sectorsize)
> + return false;
> +
> /* We cannot exceed the maximum inline data size. */
> if (data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
> return false;
> @@ -8671,7 +8675,12 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
> struct extent_buffer *leaf;
>
> name_len = strlen(symname);
> - if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
> + /*
> + * Symlinks utilize uncompressed inline extent data, which should not
> + * reach block size.
> + */
> + if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
> + name_len >= fs_info->sectorsize)
> return -ENAMETOOLONG;
>
> inode = new_inode(dir->i_sb);
This reverts commit 6ccb83d6c4972ebe6ae49de5eba051de3638362c.
Commit 6ccb83d6c497 ("usb: xhci: Implement xhci_handshake_check_state()
helper") was introduced to workaround watchdog timeout issues on some
platforms, allowing xhci_reset() to bail out early without waiting
for the reset to complete.
Skipping the xhci handshake during a reset is a dangerous move. The
xhci specification explicitly states that certain registers cannot
be accessed during reset in section 5.4.1 USB Command Register (USBCMD),
Host Controller Reset (HCRST) field:
"This bit is cleared to '0' by the Host Controller when the reset
process is complete. Software cannot terminate the reset process
early by writinga '0' to this bit and shall not write any xHC
Operational or Runtime registers until while HCRST is '1'."
This behavior causes a regression on SNPS DWC3 USB controller with
dual-role capability. When the DWC3 controller exits host mode and
removes xhci while a reset is still in progress, and then tries to
configure its hardware for device mode, the ongoing reset leads to
register access issues; specifically, all register reads returns 0.
These issues extend beyond the xhci register space (which is expected
during a reset) and affect the entire DWC3 IP block, causing the DWC3
device mode to malfunction.
Cc: stable(a)vger.kernel.org
Fixes: 6ccb83d6c497 ("usb: xhci: Implement xhci_handshake_check_state() helper")
Signed-off-by: Roy Luo <royluo(a)google.com>
---
Changes in v1:
- Link to previous patchset: https://lore.kernel.org/r/20250515185227.1507363-1-royluo@google.com/
---
drivers/usb/host/xhci-ring.c | 5 ++---
drivers/usb/host/xhci.c | 26 +-------------------------
drivers/usb/host/xhci.h | 2 --
3 files changed, 3 insertions(+), 30 deletions(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 423bf3649570..b720e04ce7d8 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -518,9 +518,8 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
* In the future we should distinguish between -ENODEV and -ETIMEDOUT
* and try to recover a -ETIMEDOUT with a host controller reset.
*/
- ret = xhci_handshake_check_state(xhci, &xhci->op_regs->cmd_ring,
- CMD_RING_RUNNING, 0, 5 * 1000 * 1000,
- XHCI_STATE_REMOVING);
+ ret = xhci_handshake(&xhci->op_regs->cmd_ring,
+ CMD_RING_RUNNING, 0, 5 * 1000 * 1000);
if (ret < 0) {
xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret);
xhci_halt(xhci);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 90eb491267b5..472c4b6ae59e 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -83,29 +83,6 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us)
return ret;
}
-/*
- * xhci_handshake_check_state - same as xhci_handshake but takes an additional
- * exit_state parameter, and bails out with an error immediately when xhc_state
- * has exit_state flag set.
- */
-int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr,
- u32 mask, u32 done, int usec, unsigned int exit_state)
-{
- u32 result;
- int ret;
-
- ret = readl_poll_timeout_atomic(ptr, result,
- (result & mask) == done ||
- result == U32_MAX ||
- xhci->xhc_state & exit_state,
- 1, usec);
-
- if (result == U32_MAX || xhci->xhc_state & exit_state)
- return -ENODEV;
-
- return ret;
-}
-
/*
* Disable interrupts and begin the xHCI halting process.
*/
@@ -226,8 +203,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us)
if (xhci->quirks & XHCI_INTEL_HOST)
udelay(1000);
- ret = xhci_handshake_check_state(xhci, &xhci->op_regs->command,
- CMD_RESET, 0, timeout_us, XHCI_STATE_REMOVING);
+ ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us);
if (ret)
return ret;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 242ab9fbc8ae..5e698561b96d 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1855,8 +1855,6 @@ void xhci_remove_secondary_interrupter(struct usb_hcd
/* xHCI host controller glue */
typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *);
int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us);
-int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr,
- u32 mask, u32 done, int usec, unsigned int exit_state);
void xhci_quiesce(struct xhci_hcd *xhci);
int xhci_halt(struct xhci_hcd *xhci);
int xhci_start(struct xhci_hcd *xhci);
base-commit: 172a9d94339cea832d89630b89d314e41d622bd8
--
2.49.0.1112.g889b7c5bd8-goog
This reverts commit 6ccb83d6c4972ebe6ae49de5eba051de3638362c.
Commit 6ccb83d6c497 ("usb: xhci: Implement xhci_handshake_check_state()
helper") was introduced to workaround watchdog timeout issues on some
platforms, allowing xhci_reset() to bail out early without waiting
for the reset to complete.
Skipping the xhci handshake during a reset is a dangerous move. The
xhci specification explicitly states that certain registers cannot
be accessed during reset in section 5.4.1 USB Command Register (USBCMD),
Host Controller Reset (HCRST) field:
"This bit is cleared to '0' by the Host Controller when the reset
process is complete. Software cannot terminate the reset process
early by writinga '0' to this bit and shall not write any xHC
Operational or Runtime registers until while HCRST is '1'."
This behavior causes a regression on SNPS DWC3 USB controller with
dual-role capability. When the DWC3 controller exits host mode and
removes xhci while a reset is still in progress, and then tries to
configure its hardware for device mode, the ongoing reset leads to
register access issues; specifically, all register reads returns 0.
These issues extend beyond the xhci register space (which is expected
during a reset) and affect the entire DWC3 IP block, causing the DWC3
device mode to malfunction.
Cc: stable(a)vger.kernel.org
Fixes: 6ccb83d6c497 ("usb: xhci: Implement xhci_handshake_check_state() helper")
Signed-off-by: Roy Luo <royluo(a)google.com>
---
drivers/usb/host/xhci-ring.c | 5 ++---
drivers/usb/host/xhci.c | 26 +-------------------------
drivers/usb/host/xhci.h | 2 --
3 files changed, 3 insertions(+), 30 deletions(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 423bf3649570..b720e04ce7d8 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -518,9 +518,8 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
* In the future we should distinguish between -ENODEV and -ETIMEDOUT
* and try to recover a -ETIMEDOUT with a host controller reset.
*/
- ret = xhci_handshake_check_state(xhci, &xhci->op_regs->cmd_ring,
- CMD_RING_RUNNING, 0, 5 * 1000 * 1000,
- XHCI_STATE_REMOVING);
+ ret = xhci_handshake(&xhci->op_regs->cmd_ring,
+ CMD_RING_RUNNING, 0, 5 * 1000 * 1000);
if (ret < 0) {
xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret);
xhci_halt(xhci);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 244b12eafd95..cb9f35acb1f9 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -83,29 +83,6 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us)
return ret;
}
-/*
- * xhci_handshake_check_state - same as xhci_handshake but takes an additional
- * exit_state parameter, and bails out with an error immediately when xhc_state
- * has exit_state flag set.
- */
-int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr,
- u32 mask, u32 done, int usec, unsigned int exit_state)
-{
- u32 result;
- int ret;
-
- ret = readl_poll_timeout_atomic(ptr, result,
- (result & mask) == done ||
- result == U32_MAX ||
- xhci->xhc_state & exit_state,
- 1, usec);
-
- if (result == U32_MAX || xhci->xhc_state & exit_state)
- return -ENODEV;
-
- return ret;
-}
-
/*
* Disable interrupts and begin the xHCI halting process.
*/
@@ -226,8 +203,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us)
if (xhci->quirks & XHCI_INTEL_HOST)
udelay(1000);
- ret = xhci_handshake_check_state(xhci, &xhci->op_regs->command,
- CMD_RESET, 0, timeout_us, XHCI_STATE_REMOVING);
+ ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us);
if (ret)
return ret;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 242ab9fbc8ae..5e698561b96d 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1855,8 +1855,6 @@ void xhci_remove_secondary_interrupter(struct usb_hcd
/* xHCI host controller glue */
typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *);
int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us);
-int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr,
- u32 mask, u32 done, int usec, unsigned int exit_state);
void xhci_quiesce(struct xhci_hcd *xhci);
int xhci_halt(struct xhci_hcd *xhci);
int xhci_start(struct xhci_hcd *xhci);
--
2.49.0.1204.g71687c7c1d-goog
In mt8195_scp_c1_irq_handler(), only the IPC interrupt bit
(MT8192_SCP_IPC_INT_BIT) was checked., but does not handle
when this bit is not set. This could lead to unhandled watchdog
events. This could lead to unhandled watchdog events. A proper
implementation can be found in mt8183_scp_irq_handler().
Add a new branch to handle SCP watchdog events when the IPC
interrupt bit is not set.
Fixes: 6a1c9aaf04eb ("remoteproc: mediatek: Add MT8195 SCP core 1 operations")
Cc: stable(a)vger.kernel.org # v6.7
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/remoteproc/mtk_scp.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c
index 0f4a7065d0bd..316e8c98a503 100644
--- a/drivers/remoteproc/mtk_scp.c
+++ b/drivers/remoteproc/mtk_scp.c
@@ -273,6 +273,8 @@ static void mt8195_scp_c1_irq_handler(struct mtk_scp *scp)
if (scp_to_host & MT8192_SCP_IPC_INT_BIT)
scp_ipi_handler(scp);
+ else
+ scp_wdt_handler(scp, scp_to_host);
writel(scp_to_host, scp->cluster->reg_base + MT8195_SSHUB2APMCU_IPC_CLR);
}
--
2.42.0.windows.2
Getting / Setting the frame interval using the V4L2 subdev pad ops
get_frame_interval/set_frame_interval causes a deadlock, as the
subdev state is locked in the [1] but also in the driver itself.
In [2] it's described that the caller is responsible to acquire and
release the lock in this case. Therefore, acquiring the lock in the
driver is wrong.
Remove the lock acquisitions/releases from mt9m114_ifp_get_frame_interval()
and mt9m114_ifp_set_frame_interval().
[1] drivers/media/v4l2-core/v4l2-subdev.c - line 1129
[2] Documentation/driver-api/media/v4l2-subdev.rst
Fixes: 24d756e914fc ("media: i2c: Add driver for onsemi MT9M114 camera sensor")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathis Foerst <mathis.foerst(a)mt.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
---
drivers/media/i2c/mt9m114.c | 8 --------
1 file changed, 8 deletions(-)
diff --git a/drivers/media/i2c/mt9m114.c b/drivers/media/i2c/mt9m114.c
index e909c1227e51..9ff46c72dbc1 100644
--- a/drivers/media/i2c/mt9m114.c
+++ b/drivers/media/i2c/mt9m114.c
@@ -1652,13 +1652,9 @@ static int mt9m114_ifp_get_frame_interval(struct v4l2_subdev *sd,
if (interval->which != V4L2_SUBDEV_FORMAT_ACTIVE)
return -EINVAL;
- mutex_lock(sensor->ifp.hdl.lock);
-
ival->numerator = 1;
ival->denominator = sensor->ifp.frame_rate;
- mutex_unlock(sensor->ifp.hdl.lock);
-
return 0;
}
@@ -1677,8 +1673,6 @@ static int mt9m114_ifp_set_frame_interval(struct v4l2_subdev *sd,
if (interval->which != V4L2_SUBDEV_FORMAT_ACTIVE)
return -EINVAL;
- mutex_lock(sensor->ifp.hdl.lock);
-
if (ival->numerator != 0 && ival->denominator != 0)
sensor->ifp.frame_rate = min_t(unsigned int,
ival->denominator / ival->numerator,
@@ -1692,8 +1686,6 @@ static int mt9m114_ifp_set_frame_interval(struct v4l2_subdev *sd,
if (sensor->streaming)
ret = mt9m114_set_frame_rate(sensor);
- mutex_unlock(sensor->ifp.hdl.lock);
-
return ret;
}
--
2.34.1
Getting / Setting the frame interval using the V4L2 subdev pad ops
get_frame_interval/set_frame_interval causes a deadlock, as the
subdev state is locked in the [1] but also in the driver itself.
In [2] it's described that the caller is responsible to acquire and
release the lock in this case. Therefore, acquiring the lock in the
driver is wrong.
Remove the lock acquisitions/releases from mt9m114_ifp_get_frame_interval()
and mt9m114_ifp_set_frame_interval().
[1] drivers/media/v4l2-core/v4l2-subdev.c - line 1129
[2] Documentation/driver-api/media/v4l2-subdev.rst
Fixes: 24d756e914fc ("media: i2c: Add driver for onsemi MT9M114 camera sensor")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathis Foerst <mathis.foerst(a)mt.com>
---
drivers/media/i2c/mt9m114.c | 8 --------
1 file changed, 8 deletions(-)
diff --git a/drivers/media/i2c/mt9m114.c b/drivers/media/i2c/mt9m114.c
index e909c1227e51..9ff46c72dbc1 100644
--- a/drivers/media/i2c/mt9m114.c
+++ b/drivers/media/i2c/mt9m114.c
@@ -1652,13 +1652,9 @@ static int mt9m114_ifp_get_frame_interval(struct v4l2_subdev *sd,
if (interval->which != V4L2_SUBDEV_FORMAT_ACTIVE)
return -EINVAL;
- mutex_lock(sensor->ifp.hdl.lock);
-
ival->numerator = 1;
ival->denominator = sensor->ifp.frame_rate;
- mutex_unlock(sensor->ifp.hdl.lock);
-
return 0;
}
@@ -1677,8 +1673,6 @@ static int mt9m114_ifp_set_frame_interval(struct v4l2_subdev *sd,
if (interval->which != V4L2_SUBDEV_FORMAT_ACTIVE)
return -EINVAL;
- mutex_lock(sensor->ifp.hdl.lock);
-
if (ival->numerator != 0 && ival->denominator != 0)
sensor->ifp.frame_rate = min_t(unsigned int,
ival->denominator / ival->numerator,
@@ -1692,8 +1686,6 @@ static int mt9m114_ifp_set_frame_interval(struct v4l2_subdev *sd,
if (sensor->streaming)
ret = mt9m114_set_frame_rate(sensor);
- mutex_unlock(sensor->ifp.hdl.lock);
-
return ret;
}
--
2.34.1
This is a note to let you know that I've just added the patch titled
iio: adc: ti-ads1298: Kconfig: add kfifo dependency to fix module
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From 3c5dfea39a245b2dad869db24e2830aa299b1cf2 Mon Sep 17 00:00:00 2001
From: Arthur-Prince <r2.arthur.prince(a)gmail.com>
Date: Wed, 30 Apr 2025 16:07:37 -0300
Subject: iio: adc: ti-ads1298: Kconfig: add kfifo dependency to fix module
build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add dependency to Kconfig’s ti-ads1298 because compiling it as a module
failed with an undefined kfifo symbol.
Fixes: 00ef7708fa60 ("iio: adc: ti-ads1298: Add driver")
Signed-off-by: Arthur-Prince <r2.arthur.prince(a)gmail.com>
Co-developed-by: Mariana Valério <mariana.valerio2(a)hotmail.com>
Signed-off-by: Mariana Valério <mariana.valerio2(a)hotmail.com>
Link: https://patch.msgid.link/20250430191131.120831-1-r2.arthur.prince@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index ad06cf556785..0fe6601e59ed 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -1562,6 +1562,7 @@ config TI_ADS1298
tristate "Texas Instruments ADS1298"
depends on SPI
select IIO_BUFFER
+ select IIO_KFIFO_BUF
help
If you say yes here you get support for Texas Instruments ADS1298
medical ADC chips
--
2.49.0
This is a note to let you know that I've just added the patch titled
iio: adc: ti-ads1298: Kconfig: add kfifo dependency to fix module
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the char-misc-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
From 3c5dfea39a245b2dad869db24e2830aa299b1cf2 Mon Sep 17 00:00:00 2001
From: Arthur-Prince <r2.arthur.prince(a)gmail.com>
Date: Wed, 30 Apr 2025 16:07:37 -0300
Subject: iio: adc: ti-ads1298: Kconfig: add kfifo dependency to fix module
build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add dependency to Kconfig’s ti-ads1298 because compiling it as a module
failed with an undefined kfifo symbol.
Fixes: 00ef7708fa60 ("iio: adc: ti-ads1298: Add driver")
Signed-off-by: Arthur-Prince <r2.arthur.prince(a)gmail.com>
Co-developed-by: Mariana Valério <mariana.valerio2(a)hotmail.com>
Signed-off-by: Mariana Valério <mariana.valerio2(a)hotmail.com>
Link: https://patch.msgid.link/20250430191131.120831-1-r2.arthur.prince@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index ad06cf556785..0fe6601e59ed 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -1562,6 +1562,7 @@ config TI_ADS1298
tristate "Texas Instruments ADS1298"
depends on SPI
select IIO_BUFFER
+ select IIO_KFIFO_BUF
help
If you say yes here you get support for Texas Instruments ADS1298
medical ADC chips
--
2.49.0
Hi,
commit 959cadf09dbae7b304f03e039b8d8e13c529e2dd
Author: Peter Zijlstra <peterz(a)infradead.org>
Date: Mon Oct 14 10:05:48 2024 -0700
x86/its: Use dynamic thunks for indirect branches
commit 872df34d7c51a79523820ea6a14860398c639b87 upstream.
was ported at v6.1.139 and leads to kernel crashes there after module
unload operations.
Example trace:
BUG: unable to handle page fault for address: ffff8fcb47dd4000
#PF: supervisor write access in kernel mode
#PF: error_code(0x0003) - permissions violation
PGD 34801067 P4D 34801067 PUD 100148063 PMD 107dd5063 PTE 8000000107dd4161
Oops: 0003 [#1] PREEMPT SMP NOPTI
CPU: 3 PID: 378 Comm: modprobe Not tainted 6.1.139-01446-g753bd4a5f9a9 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:__change_page_attr_set_clr+0x49d/0x1280
Call Trace:
<TASK>
? free_unref_page_prepare+0x80/0x4b0
set_direct_map_invalid_noflush+0x6e/0xa0
__vunmap+0x18c/0x3e0
__vfree+0x21/0xb0
vfree+0x2b/0x90
module_memfree+0x1b/0x50
free_module+0x17c/0x250
__do_sys_delete_module+0x337/0x4b0
__x64_sys_delete_module+0x15/0x30
x64_sys_call+0x3f9a/0x43a0
do_syscall_64+0x33/0x80
entry_SYSCALL_64_after_hwframe+0x6e/0xd8
RIP: 0033:0x7f70755c0807
</TASK>
Modules linked in: dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua [last unloaded: scsi_debug]
As mentioned in the blamed patch comment describing the backport
adaptations:
[ pawan: CONFIG_EXECMEM and CONFIG_EXECMEM_ROX are not supported on
backport kernel, made changes to use module_alloc() and
set_memory_*() for dynamic thunks. ]
module_alloc/module_memfree in conjunction with memory protection routines
were used. The allocated memory is vmalloc-based, and it ends up being ROX
upon release inside its_free_mod().
Freeing of special permissioned memory in vmalloc requires its own
handling. VM_FLUSH_RESET_PERMS flag was introduced for these purposes.
In-kernel users dealing with the stuff had to care about this explicitly
before commit 4c4eb3ecc91f ("x86/modules: Set VM_FLUSH_RESET_PERMS in
module_alloc()").
More recent kernels starting from 6.2 have the commit and are not affected.
So port it as a followup for ITS mitigation 6.1-series to fix the
aforementioned failures.
The problem concerns 5.15-series (currently in stable-queue) as well. It
needs its own patch to apply cleanly. Will send it shortly, too.
Found by Linux Verification Center (linuxtesting.org).
Thomas Gleixner (1):
x86/modules: Set VM_FLUSH_RESET_PERMS in module_alloc()
arch/x86/kernel/ftrace.c | 2 --
arch/x86/kernel/kprobes/core.c | 1 -
arch/x86/kernel/module.c | 9 +++++----
3 files changed, 5 insertions(+), 7 deletions(-)
--
2.49.0
The xHC resources allocated for USB devices are not released in correct
order after resuming in case when while suspend device was reconnected.
This issue has been detected during the fallowing scenario:
- connect hub HS to root port
- connect LS/FS device to hub port
- wait for enumeration to finish
- force host to suspend
- reconnect hub attached to root port
- wake host
For this scenario during enumeration of USB LS/FS device the Cadence xHC
reports completion error code for xHC commands because the xHC resources
used for devices has not been properly released.
XHCI specification doesn't mention that device can be reset in any order
so, we should not treat this issue as Cadence xHC controller bug.
Similar as during disconnecting in this case the device resources should
be cleared starting form the last usb device in tree toward the root hub.
To fix this issue usbcore driver should call hcd->driver->reset_device
for all USB devices connected to hub which was reconnected while
suspending.
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
Changelog:
v3:
- Changed patch title
- Corrected typo
- Moved hub_hc_release_resources above mutex_lock(hcd->address0_mutex)
v2:
- Replaced disconnection procedure with releasing only the xHC resources
drivers/usb/core/hub.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index a76bb50b6202..dcba4281ea48 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -6065,6 +6065,36 @@ void usb_hub_cleanup(void)
usb_deregister(&hub_driver);
} /* usb_hub_cleanup() */
+/**
+ * hub_hc_release_resources - clear resources used by host controller
+ * @udev: pointer to device being released
+ *
+ * Context: task context, might sleep
+ *
+ * Function releases the host controller resources in correct order before
+ * making any operation on resuming usb device. The host controller resources
+ * allocated for devices in tree should be released starting from the last
+ * usb device in tree toward the root hub. This function is used only during
+ * resuming device when usb device require reinitialization – that is, when
+ * flag udev->reset_resume is set.
+ *
+ * This call is synchronous, and may not be used in an interrupt context.
+ */
+static void hub_hc_release_resources(struct usb_device *udev)
+{
+ struct usb_hub *hub = usb_hub_to_struct_hub(udev);
+ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
+ int i;
+
+ /* Release up resources for all children before this device */
+ for (i = 0; i < udev->maxchild; i++)
+ if (hub->ports[i]->child)
+ hub_hc_release_resources(hub->ports[i]->child);
+
+ if (hcd->driver->reset_device)
+ hcd->driver->reset_device(hcd, udev);
+}
+
/**
* usb_reset_and_verify_device - perform a USB port reset to reinitialize a device
* @udev: device to reset (not in SUSPENDED or NOTATTACHED state)
@@ -6129,6 +6159,9 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
bos = udev->bos;
udev->bos = NULL;
+ if (udev->reset_resume)
+ hub_hc_release_resources(udev);
+
mutex_lock(hcd->address0_mutex);
for (i = 0; i < PORT_INIT_TRIES; ++i) {
--
2.43.0
Developers are indeed hitting other of the `noreturn` slice symbols in
Nova [1], thus relax the last check in the list so that we catch all of
them, i.e.
*_4core5slice5index22slice_index_order_fail
*_4core5slice5index24slice_end_index_len_fail
*_4core5slice5index26slice_start_index_len_fail
*_4core5slice5index29slice_end_index_overflow_fail
*_4core5slice5index31slice_start_index_overflow_fail
These all exist since at least Rust 1.78.0, thus backport it too.
See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions")
for more details.
Cc: stable(a)vger.kernel.org # Needed in 6.12.y and later.
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Timur Tabi <ttabi(a)nvidia.com>
Cc: Kane York <kanepyork(a)gmail.com>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Reported-by: Joel Fernandes <joelagnelf(a)nvidia.com>
Link: https://lore.kernel.org/rust-for-linux/20250513180757.GA1295002@joelnvbox/ [1]
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
---
I tested it with the Timur's `alex` branch, but a Tested-by is appreciated.
Thanks!
tools/objtool/check.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b21b12ec88d9..f23bdda737aa 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -230,7 +230,8 @@ static bool is_rust_noreturn(const struct symbol *func)
str_ends_with(func->name, "_7___rustc17rust_begin_unwind") ||
strstr(func->name, "_4core9panicking13assert_failed") ||
strstr(func->name, "_4core9panicking11panic_const24panic_const_") ||
- (strstr(func->name, "_4core5slice5index24slice_") &&
+ (strstr(func->name, "_4core5slice5index") &&
+ strstr(func->name, "slice_") &&
str_ends_with(func->name, "_fail"));
}
base-commit: a5806cd506af5a7c19bcd596e4708b5c464bfd21
--
2.49.0