- Linux-stable-mirror - lists.linaro.org

[PATCH 6.1] net: add netdev_lockdep_set_classes() to virtual drivers

by Sumanth Gavini

commit 0bef512012b1cd8820f0c9ec80e5f8ceb43fdd59 upstream. Based on a syzbot report, it appears many virtual drivers do not yet use netdev_lockdep_set_classes(), triggerring lockdep false positives. WARNING: possible recursive locking detected 6.8.0-rc4-next-20240212-syzkaller #0 Not tainted syz-executor.0/19016 is trying to acquire lock: ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline] ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340 but task is already holding lock: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline] ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340 other info that might help us debug this: Possible unsafe locking scenario: CPU0 lock(_xmit_ETHER#2); lock(_xmit_ETHER#2); *** DEADLOCK *** May be due to missing lock nesting notation 9 locks held by syz-executor.0/19016: #0: ffffffff8f385208 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock net/core/rtnetlink.c:79 [inline] #0: ffffffff8f385208 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x82c/0x1040 net/core/rtnetlink.c:6603 #1: ffffc90000a08c00 ((&in_dev->mr_ifc_timer)){+.-.}-{0:0}, at: call_timer_fn+0xc0/0x600 kernel/time/timer.c:1697 #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:298 [inline] #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:750 [inline] #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1360 net/ipv4/ip_output.c:228 #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: local_bh_disable include/linux/bottom_half.h:20 [inline] #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: rcu_read_lock_bh include/linux/rcupdate.h:802 [inline] #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x2c4/0x3b10 net/core/dev.c:4284 #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: spin_trylock include/linux/spinlock.h:361 [inline] #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: qdisc_run_begin include/net/sch_generic.h:195 [inline] #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_xmit_skb net/core/dev.c:3771 [inline] #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_queue_xmit+0x1262/0x3b10 net/core/dev.c:4325 #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline] #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340 #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:298 [inline] #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:750 [inline] #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1360 net/ipv4/ip_output.c:228 #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: local_bh_disable include/linux/bottom_half.h:20 [inline] #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: rcu_read_lock_bh include/linux/rcupdate.h:802 [inline] #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x2c4/0x3b10 net/core/dev.c:4284 #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: spin_trylock include/linux/spinlock.h:361 [inline] #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: qdisc_run_begin include/net/sch_generic.h:195 [inline] #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_xmit_skb net/core/dev.c:3771 [inline] #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_queue_xmit+0x1262/0x3b10 net/core/dev.c:4325 stack backtrace: CPU: 1 PID: 19016 Comm: syz-executor.0 Not tainted 6.8.0-rc4-next-20240212-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 check_deadlock kernel/locking/lockdep.c:3062 [inline] validate_chain+0x15c1/0x58e0 kernel/locking/lockdep.c:3856 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e4/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __netif_tx_lock include/linux/netdevice.h:4452 [inline] sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340 __dev_xmit_skb net/core/dev.c:3784 [inline] __dev_queue_xmit+0x1912/0x3b10 net/core/dev.c:4325 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0xe66/0x1360 net/ipv4/ip_output.c:235 iptunnel_xmit+0x540/0x9b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x20ee/0x2960 net/ipv4/ip_tunnel.c:831 erspan_xmit+0x9de/0x1460 net/ipv4/ip_gre.c:720 __netdev_start_xmit include/linux/netdevice.h:4989 [inline] netdev_start_xmit include/linux/netdevice.h:5003 [inline] xmit_one net/core/dev.c:3555 [inline] dev_hard_start_xmit+0x242/0x770 net/core/dev.c:3571 sch_direct_xmit+0x2b6/0x5f0 net/sched/sch_generic.c:342 __dev_xmit_skb net/core/dev.c:3784 [inline] __dev_queue_xmit+0x1912/0x3b10 net/core/dev.c:4325 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0xe66/0x1360 net/ipv4/ip_output.c:235 igmpv3_send_cr net/ipv4/igmp.c:723 [inline] igmp_ifc_timer_expire+0xb71/0xd90 net/ipv4/igmp.c:813 call_timer_fn+0x17e/0x600 kernel/time/timer.c:1700 expire_timers kernel/time/timer.c:1751 [inline] __run_timers+0x621/0x830 kernel/time/timer.c:2038 run_timer_softirq+0x67/0xf0 kernel/time/timer.c:2051 __do_softirq+0x2bc/0x943 kernel/softirq.c:554 invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu+0xf2/0x1c0 kernel/softirq.c:633 irq_exit_rcu+0x9/0x30 kernel/softirq.c:645 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1076 [inline] sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1076 </IRQ> <TASK> asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 RIP: 0010:resched_offsets_ok kernel/sched/core.c:10127 [inline] RIP: 0010:__might_resched+0x16f/0x780 kernel/sched/core.c:10142 Code: 00 4c 89 e8 48 c1 e8 03 48 ba 00 00 00 00 00 fc ff df 48 89 44 24 38 0f b6 04 10 84 c0 0f 85 87 04 00 00 41 8b 45 00 c1 e0 08 <01> d8 44 39 e0 0f 85 d6 00 00 00 44 89 64 24 1c 48 8d bc 24 a0 00 RSP: 0018:ffffc9000ee069e0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8880296a9e00 RDX: dffffc0000000000 RSI: ffff8880296a9e00 RDI: ffffffff8bfe8fa0 RBP: ffffc9000ee06b00 R08: ffffffff82326877 R09: 1ffff11002b5ad1b R10: dffffc0000000000 R11: ffffed1002b5ad1c R12: 0000000000000000 R13: ffff8880296aa23c R14: 000000000000062a R15: 1ffff92001dc0d44 down_write+0x19/0x50 kernel/locking/rwsem.c:1578 kernfs_activate fs/kernfs/dir.c:1403 [inline] kernfs_add_one+0x4af/0x8b0 fs/kernfs/dir.c:819 __kernfs_create_file+0x22e/0x2e0 fs/kernfs/file.c:1056 sysfs_add_file_mode_ns+0x24a/0x310 fs/sysfs/file.c:307 create_files fs/sysfs/group.c:64 [inline] internal_create_group+0x4f4/0xf20 fs/sysfs/group.c:152 internal_create_groups fs/sysfs/group.c:192 [inline] sysfs_create_groups+0x56/0x120 fs/sysfs/group.c:218 create_dir lib/kobject.c:78 [inline] kobject_add_internal+0x472/0x8d0 lib/kobject.c:240 kobject_add_varg lib/kobject.c:374 [inline] kobject_init_and_add+0x124/0x190 lib/kobject.c:457 netdev_queue_add_kobject net/core/net-sysfs.c:1706 [inline] netdev_queue_update_kobjects+0x1f3/0x480 net/core/net-sysfs.c:1758 register_queue_kobjects net/core/net-sysfs.c:1819 [inline] netdev_register_kobject+0x265/0x310 net/core/net-sysfs.c:2059 register_netdevice+0x1191/0x19c0 net/core/dev.c:10298 bond_newlink+0x3b/0x90 drivers/net/bonding/bond_netlink.c:576 rtnl_newlink_create net/core/rtnetlink.c:3506 [inline] __rtnl_newlink net/core/rtnetlink.c:3726 [inline] rtnl_newlink+0x158f/0x20a0 net/core/rtnetlink.c:3739 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6606 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3c/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 __sys_sendto+0x3a4/0x4f0 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2199 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 RIP: 0033:0x7fc3fa87fa9c Reported-by: syzbot <syzkaller(a)googlegroups.com> Signed-off-by: Eric Dumazet <edumazet(a)google.com> Link: https://lore.kernel.org/r/20240212140700.2795436-4-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> Signed-off-by: Sumanth Gavini <sumanth.gavini(a)yahoo.com> --- drivers/net/dummy.c | 1 + drivers/net/geneve.c | 2 ++ drivers/net/loopback.c | 2 ++ drivers/net/veth.c | 1 + drivers/net/vxlan/vxlan_core.c | 1 + net/ipv4/ip_tunnel.c | 1 + net/ipv6/ip6_gre.c | 2 ++ net/ipv6/ip6_tunnel.c | 1 + net/ipv6/ip6_vti.c | 1 + net/ipv6/sit.c | 1 + 10 files changed, 13 insertions(+) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index aa0fc00faecb..f05d4194eb09 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -71,6 +71,7 @@ static int dummy_dev_init(struct net_device *dev) if (!dev->lstats) return -ENOMEM; + netdev_lockdep_set_classes(dev); return 0; } diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f393e454f45c..d2fbd1cd0ce3 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -335,6 +335,8 @@ static int geneve_init(struct net_device *dev) gro_cells_destroy(&geneve->gro_cells); return err; } + + netdev_lockdep_set_classes(dev); return 0; } diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 2e9742952c4e..ab5264ddd765 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -144,6 +144,8 @@ static int loopback_dev_init(struct net_device *dev) dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; + + netdev_lockdep_set_classes(dev); return 0; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 09682ea3354e..00ad39dc297e 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1391,6 +1391,7 @@ static void veth_free_queues(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); + netdev_lockdep_set_classes(dev); kfree(priv->rq); } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 6ab669dcd1c6..a233ae4bf61f 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2926,6 +2926,7 @@ static int vxlan_init(struct net_device *dev) return err; } + netdev_lockdep_set_classes(dev); return 0; } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 019f3b0839c5..20e35fdb373d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1253,6 +1253,7 @@ int ip_tunnel_init(struct net_device *dev) if (tunnel->collect_md) netif_keep_dst(dev); + netdev_lockdep_set_classes(dev); return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c035a96fba3a..419e94e9bd62 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1530,6 +1530,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) ip6gre_tnl_init_features(dev); netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; cleanup_dst_cache_init: @@ -1922,6 +1923,7 @@ static int ip6erspan_tap_init(struct net_device *dev) ip6erspan_tnl_link_config(tunnel, 1); netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; cleanup_dst_cache_init: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2fb4c6ad7243..96030f29df9b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1885,6 +1885,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; destroy_dst: diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 151337d7f67b..f6a2c55a4dcb 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -937,6 +937,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) if (!dev->tstats) return -ENOMEM; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5703d3cbea9b..c2bf8eb81a58 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1458,6 +1458,7 @@ static int ipip6_tunnel_init(struct net_device *dev) return err; } netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; } -- 2.43.0

1 month

2
3
0 0

[PATCH 5.10.y] comedi: das16m1: Fix bit shift out of bounds

by Ian Abbott

[ Upstream commit ed93c6f68a3be06e4e0c331c6e751f462dee3932 ] When checking for a supported IRQ number, the following test is used: /* only irqs 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, and 15 are valid */ if ((1 << it->options[1]) & 0xdcfc) { However, `it->options[i]` is an unchecked `int` value from userspace, so the shift amount could be negative or out of bounds. Fix the test by requiring `it->options[1]` to be within bounds before proceeding with the original test. Reported-by: syzbot+c52293513298e0fd9a94(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c52293513298e0fd9a94 Fixes: 729988507680 ("staging: comedi: das16m1: tidy up the irq support in das16m1_attach()") Tested-by: syzbot+c52293513298e0fd9a94(a)syzkaller.appspotmail.com Suggested-by: "Enju, Kohei" <enjuk(a)amazon.co.jp> Cc: stable(a)vger.kernel.org # 5.13+ Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> Link: https://lore.kernel.org/r/20250707130908.70758-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/staging/comedi/drivers/das16m1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/das16m1.c b/drivers/staging/comedi/drivers/das16m1.c index 75f3dbbe97ac..0d54387a1c26 100644 --- a/drivers/staging/comedi/drivers/das16m1.c +++ b/drivers/staging/comedi/drivers/das16m1.c @@ -523,7 +523,8 @@ static int das16m1_attach(struct comedi_device *dev, devpriv->extra_iobase = dev->iobase + DAS16M1_8255_IOBASE; /* only irqs 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, and 15 are valid */ - if ((1 << it->options[1]) & 0xdcfc) { + if (it->options[1] >= 2 && it->options[1] <= 15 && + (1 << it->options[1]) & 0xdcfc) { ret = request_irq(it->options[1], das16m1_interrupt, 0, dev->board_name, dev); if (ret == 0) -- 2.47.2

1 month

2
1
0 0

[PATCH 5.10.y] comedi: das6402: Fix bit shift out of bounds

by Ian Abbott

[ Upstream commit 70f2b28b5243df557f51c054c20058ae207baaac ] When checking for a supported IRQ number, the following test is used: /* IRQs 2,3,5,6,7, 10,11,15 are valid for "enhanced" mode */ if ((1 << it->options[1]) & 0x8cec) { However, `it->options[i]` is an unchecked `int` value from userspace, so the shift amount could be negative or out of bounds. Fix the test by requiring `it->options[1]` to be within bounds before proceeding with the original test. Valid `it->options[1]` values that select the IRQ will be in the range [1,15]. The value 0 explicitly disables the use of interrupts. Fixes: 79e5e6addbb1 ("staging: comedi: das6402: rewrite broken driver") Cc: stable(a)vger.kernel.org # 5.13+ Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> Link: https://lore.kernel.org/r/20250707135737.77448-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/staging/comedi/drivers/das6402.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/das6402.c b/drivers/staging/comedi/drivers/das6402.c index 96f4107b8054..927d4b832ecc 100644 --- a/drivers/staging/comedi/drivers/das6402.c +++ b/drivers/staging/comedi/drivers/das6402.c @@ -569,7 +569,8 @@ static int das6402_attach(struct comedi_device *dev, das6402_reset(dev); /* IRQs 2,3,5,6,7, 10,11,15 are valid for "enhanced" mode */ - if ((1 << it->options[1]) & 0x8cec) { + if (it->options[1] > 0 && it->options[1] < 16 && + (1 << it->options[1]) & 0x8cec) { ret = request_irq(it->options[1], das6402_interrupt, 0, dev->board_name, dev); if (ret == 0) { -- 2.47.2

1 month

2
1
0 0

[PATCH 5.10.y] comedi: Fix initialization of data for instructions that write to subdevice

by Ian Abbott

[ Upstream commit 46d8c744136ce2454aa4c35c138cc06817f92b8e ] Some Comedi subdevice instruction handlers are known to access instruction data elements beyond the first `insn->n` elements in some cases. The `do_insn_ioctl()` and `do_insnlist_ioctl()` functions allocate at least `MIN_SAMPLES` (16) data elements to deal with this, but they do not initialize all of that. For Comedi instruction codes that write to the subdevice, the first `insn->n` data elements are copied from user-space, but the remaining elements are left uninitialized. That could be a problem if the subdevice instruction handler reads the uninitialized data. Ensure that the first `MIN_SAMPLES` elements are initialized before calling these instruction handlers, filling the uncopied elements with 0. For `do_insnlist_ioctl()`, the same data buffer elements are used for handling a list of instructions, so ensure the first `MIN_SAMPLES` elements are initialized for each instruction that writes to the subdevice. Fixes: ed9eccbe8970 ("Staging: add comedi core") Cc: stable(a)vger.kernel.org # 5.13+ Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> Link: https://lore.kernel.org/r/20250707161439.88385-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/staging/comedi/comedi_fops.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 5aa6a84d1fa6..96d68cc8f449 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -1551,21 +1551,27 @@ static int do_insnlist_ioctl(struct comedi_device *dev, } for (i = 0; i < n_insns; ++i) { + unsigned int n = insns[i].n; + if (insns[i].insn & INSN_MASK_WRITE) { if (copy_from_user(data, insns[i].data, - insns[i].n * sizeof(unsigned int))) { + n * sizeof(unsigned int))) { dev_dbg(dev->class_dev, "copy_from_user failed\n"); ret = -EFAULT; goto error; } + if (n < MIN_SAMPLES) { + memset(&data[n], 0, (MIN_SAMPLES - n) * + sizeof(unsigned int)); + } } ret = parse_insn(dev, insns + i, data, file); if (ret < 0) goto error; if (insns[i].insn & INSN_MASK_READ) { if (copy_to_user(insns[i].data, data, - insns[i].n * sizeof(unsigned int))) { + n * sizeof(unsigned int))) { dev_dbg(dev->class_dev, "copy_to_user failed\n"); ret = -EFAULT; @@ -1638,6 +1644,10 @@ static int do_insn_ioctl(struct comedi_device *dev, ret = -EFAULT; goto error; } + if (insn->n < MIN_SAMPLES) { + memset(&data[insn->n], 0, + (MIN_SAMPLES - insn->n) * sizeof(unsigned int)); + } } ret = parse_insn(dev, insn, data, file); if (ret < 0) -- 2.47.2

1 month

2
1
0 0

[PATCH 5.10.y] comedi: Fail COMEDI_INSNLIST ioctl if n_insns is too large

by Ian Abbott

[ Upstream commit 08ae4b20f5e82101d77326ecab9089e110f224cc ] The handling of the `COMEDI_INSNLIST` ioctl allocates a kernel buffer to hold the array of `struct comedi_insn`, getting the length from the `n_insns` member of the `struct comedi_insnlist` supplied by the user. The allocation will fail with a WARNING and a stack dump if it is too large. Avoid that by failing with an `-EINVAL` error if the supplied `n_insns` value is unreasonable. Define the limit on the `n_insns` value in the `MAX_INSNS` macro. Set this to the same value as `MAX_SAMPLES` (65536), which is the maximum allowed sum of the values of the member `n` in the array of `struct comedi_insn`, and sensible comedi instructions will have an `n` of at least 1. Reported-by: syzbot+d6995b62e5ac7d79557a(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d6995b62e5ac7d79557a Fixes: ed9eccbe8970 ("Staging: add comedi core") Tested-by: Ian Abbott <abbotti(a)mev.co.uk> Cc: stable(a)vger.kernel.org # 5.13+ Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> Link: https://lore.kernel.org/r/20250704120405.83028-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/staging/comedi/comedi_fops.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 8f896e6208a8..5aa6a84d1fa6 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -1584,6 +1584,16 @@ static int do_insnlist_ioctl(struct comedi_device *dev, return i; } +#define MAX_INSNS MAX_SAMPLES +static int check_insnlist_len(struct comedi_device *dev, unsigned int n_insns) +{ + if (n_insns > MAX_INSNS) { + dev_dbg(dev->class_dev, "insnlist length too large\n"); + return -EINVAL; + } + return 0; +} + /* * COMEDI_INSN ioctl * synchronous instruction @@ -2234,6 +2244,9 @@ static long comedi_unlocked_ioctl(struct file *file, unsigned int cmd, rc = -EFAULT; break; } + rc = check_insnlist_len(dev, insnlist.n_insns); + if (rc) + break; insns = kcalloc(insnlist.n_insns, sizeof(*insns), GFP_KERNEL); if (!insns) { rc = -ENOMEM; @@ -3085,6 +3098,9 @@ static int compat_insnlist(struct file *file, unsigned long arg) if (copy_from_user(&insnlist32, compat_ptr(arg), sizeof(insnlist32))) return -EFAULT; + rc = check_insnlist_len(dev, insnlist32.n_insns); + if (rc) + return rc; insns = kcalloc(insnlist32.n_insns, sizeof(*insns), GFP_KERNEL); if (!insns) return -ENOMEM; -- 2.47.2

1 month

2
2
0 0

[PATCH 5.10.y] comedi: comedi_test: Fix possible deletion of uninitialized timers

by Ian Abbott

[ Upstream commit 1b98304c09a0192598d0767f1eb8c83d7e793091 ] In `waveform_common_attach()`, the two timers `&devpriv->ai_timer` and `&devpriv->ao_timer` are initialized after the allocation of the device private data by `comedi_alloc_devpriv()` and the subdevices by `comedi_alloc_subdevices()`. The function may return with an error between those function calls. In that case, `waveform_detach()` will be called by the Comedi core to clean up. The check that `waveform_detach()` uses to decide whether to delete the timers is incorrect. It only checks that the device private data was allocated, but that does not guarantee that the timers were initialized. It also needs to check that the subdevices were allocated. Fix it. Fixes: 73e0e4dfed4c ("staging: comedi: comedi_test: fix timer lock-up") Cc: stable(a)vger.kernel.org # 6.15+ Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> Link: https://lore.kernel.org/r/20250708130627.21743-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> [ changed timer_delete_sync() to del_timer_sync() ] Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> --- drivers/staging/comedi/drivers/comedi_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/comedi_test.c b/drivers/staging/comedi/drivers/comedi_test.c index bea9a3adf08c..f5199474c0e9 100644 --- a/drivers/staging/comedi/drivers/comedi_test.c +++ b/drivers/staging/comedi/drivers/comedi_test.c @@ -790,7 +790,7 @@ static void waveform_detach(struct comedi_device *dev) { struct waveform_private *devpriv = dev->private; - if (devpriv) { + if (devpriv && dev->n_subdevices) { del_timer_sync(&devpriv->ai_timer); del_timer_sync(&devpriv->ao_timer); } -- 2.47.2

1 month

2
1
0 0

[PATCH 5.4.y] comedi: pcl812: Fix bit shift out of bounds

by Ian Abbott

[ Upstream commit b14b076ce593f72585412fc7fd3747e03a5e3632 ] When checking for a supported IRQ number, the following test is used: if ((1 << it->options[1]) & board->irq_bits) { However, `it->options[i]` is an unchecked `int` value from userspace, so the shift amount could be negative or out of bounds. Fix the test by requiring `it->options[1]` to be within bounds before proceeding with the original test. Valid `it->options[1]` values that select the IRQ will be in the range [1,15]. The value 0 explicitly disables the use of interrupts. Reported-by: syzbot+32de323b0addb9e114ff(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=32de323b0addb9e114ff Fixes: fcdb427bc7cf ("Staging: comedi: add pcl821 driver") Cc: stable(a)vger.kernel.org # 5.13+ Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> Link: https://lore.kernel.org/r/20250707133429.73202-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/staging/comedi/drivers/pcl812.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/pcl812.c b/drivers/staging/comedi/drivers/pcl812.c index aefc1b849cf7..98112c79e2d7 100644 --- a/drivers/staging/comedi/drivers/pcl812.c +++ b/drivers/staging/comedi/drivers/pcl812.c @@ -1151,7 +1151,8 @@ static int pcl812_attach(struct comedi_device *dev, struct comedi_devconfig *it) if (!dev->pacer) return -ENOMEM; - if ((1 << it->options[1]) & board->irq_bits) { + if (it->options[1] > 0 && it->options[1] < 16 && + (1 << it->options[1]) & board->irq_bits) { ret = request_irq(it->options[1], pcl812_interrupt, 0, dev->board_name, dev); if (ret == 0) -- 2.47.2

1 month

2
1
0 0

[PATCH net] net: phy: micrel: fix KSZ8081/KSZ8091 cable test

by Florian Larysch

Commit 21b688dabecb ("net: phy: micrel: Cable Diag feature for lan8814 phy") introduced cable_test support for the LAN8814 that reuses parts of the KSZ886x logic and introduced the cable_diag_reg and pair_mask parameters to account for differences between those chips. However, it did not update the ksz8081_type struct, so those members are now 0, causing no pairs to be tested in ksz886x_cable_test_get_status and ksz886x_cable_test_wait_for_completion to poll the wrong register for the affected PHYs (Basic Control/Reset, which is 0 in normal operation) and exit immediately. Fix this by setting both struct members accordingly. Fixes: 21b688dabecb ("net: phy: micrel: Cable Diag feature for lan8814 phy") Cc: stable(a)vger.kernel.org Signed-off-by: Florian Larysch <fl(a)n621.de> --- drivers/net/phy/micrel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 64aa03aed770..50c6a4e8cfa1 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -472,6 +472,8 @@ static const struct kszphy_type ksz8051_type = { static const struct kszphy_type ksz8081_type = { .led_mode_reg = MII_KSZPHY_CTRL_2, + .cable_diag_reg = KSZ8081_LMD, + .pair_mask = KSZPHY_WIRE_PAIR_MASK, .has_broadcast_disable = true, .has_nand_tree_disable = true, .has_rmii_ref_clk_sel = true, -- 2.50.1

1 month

2
1
0 0

[PATCH net-next] net: ipv4: allow directed broadcast routes to use dst hint

by Oscar Maes

Currently, ip_extract_route_hint uses RTN_BROADCAST to decide whether to use the route dst hint mechanism. This check is too strict, as it prevents directed broadcast routes from using the hint, resulting in poor performance during bursts of directed broadcast traffic. Fix this in ip_extract_route_hint and modify ip_route_use_hint to preserve the intended behaviour. Signed-off-by: Oscar Maes <oscmaes92(a)gmail.com> --- net/ipv4/ip_input.c | 6 ++++-- net/ipv4/route.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index fc323994b..1581b98bc 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -589,8 +589,10 @@ static void ip_sublist_rcv_finish(struct list_head *head) static struct sk_buff *ip_extract_route_hint(const struct net *net, struct sk_buff *skb, int rt_type) { - if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST || - IPCB(skb)->flags & IPSKB_MULTIPATH) + const struct iphdr *iph = ip_hdr(skb); + + if (fib4_has_custom_rules(net) || ipv4_is_lbcast(iph->daddr) || + (iph->daddr == 0 && iph->saddr == 0) || IPCB(skb)->flags & IPSKB_MULTIPATH) return NULL; return skb; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f639a2ae8..1f212b2ce 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2210,7 +2210,7 @@ ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto martian_source; } - if (rt->rt_type != RTN_LOCAL) + if (!(rt->rt_flags & RTCF_LOCAL)) goto skip_validate_source; reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev, -- 2.39.5

1 month

3
2
0 0

[PATCH v2 2/4] riscv: use lw when reading int cpu in asm_per_cpu

by Radim Krčmář

REG_L is wrong, because thread_info.cpu is 32-bit, not xlen-bit wide. The struct currently has a hole after cpu, so little endian accesses seemed fine. Fixes: be97d0db5f44 ("riscv: VMAP_STACK overflow detection thread-safe") Cc: <stable(a)vger.kernel.org> Reviewed-by: Alexandre Ghiti <alexghiti(a)rivosinc.com> Signed-off-by: Radim Krčmář <rkrcmar(a)ventanamicro.com> --- v2: split for stable [Alex] --- arch/riscv/include/asm/asm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index a8a2af6dfe9d..2a16e88e13de 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -91,7 +91,7 @@ #endif .macro asm_per_cpu dst sym tmp - REG_L \tmp, TASK_TI_CPU_NUM(tp) + lw \tmp, TASK_TI_CPU_NUM(tp) slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT la \dst, __per_cpu_offset add \dst, \dst, \tmp -- 2.50.0

1 month

1
0
0 0

[PATCH v2 1/4] riscv: use lw when reading int cpu in new_vmalloc_check

by Radim Krčmář

REG_L is wrong, because thread_info.cpu is 32-bit, not xlen-bit wide. The struct currently has a hole after cpu, so little endian accesses seemed fine. Fixes: 503638e0babf ("riscv: Stop emitting preventive sfence.vma for new vmalloc mappings") Cc: <stable(a)vger.kernel.org> Reviewed-by: Alexandre Ghiti <alexghiti(a)rivosinc.com> Signed-off-by: Radim Krčmář <rkrcmar(a)ventanamicro.com> --- v2: split for stable [Alex] --- arch/riscv/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 75656afa2d6b..4fdf187a62bf 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -46,7 +46,7 @@ * a0 = &new_vmalloc[BIT_WORD(cpu)] * a1 = BIT_MASK(cpu) */ - REG_L a2, TASK_TI_CPU(tp) + lw a2, TASK_TI_CPU(tp) /* * Compute the new_vmalloc element position: * (cpu / 64) * 8 = (cpu >> 6) << 3 -- 2.50.0

1 month

1
0
0 0

[PATCH] ALSA: hda/realtek - Fix mute LED for HP Victus 16-r1xxx

by edip＠medip.dev

From: Edip Hazuri <edip(a)medip.dev> The mute led on this laptop is using ALC245 but requires a quirk to work This patch enables the existing quirk for the device. Tested on Victus 16-r1xxx Laptop. The LED behaviour works as intended. v2: - adapt the HD-audio code changes and rebase on for-next branch of tiwai/sound.git - link to v1: https://lore.kernel.org/linux-sound/20250724210756.61453-2-edip@medip.dev/ Cc: <stable(a)vger.kernel.org> Signed-off-by: Edip Hazuri <edip(a)medip.dev> --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 05019fa73..33ef08d25 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6580,6 +6580,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c99, "HP Victus 16-r1xxx (MB 8C99)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), -- 2.50.1

1 month

2
1
0 0

[PATCH] ALSA: hda/realtek - Fix mute LED for HP Victus 16-r1xxx

by edip＠medip.dev

From: Edip Hazuri <edip(a)medip.dev> The mute led on this laptop is using ALC245 but requires a quirk to work This patch enables the existing quirk for the device. Tested on Victus 16-r1xxx Laptop. The LED behaviour works as intended. Cc: <stable(a)vger.kernel.org> Signed-off-by: Edip Hazuri <edip(a)medip.dev> --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2627e2f49..9656e6ebb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10874,6 +10874,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c99, "HP Victus 16-r1xxx (MB 8C99)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), -- 2.50.1

1 month

3
2
0 0

[PATCH] comedi: Make insn_rw_emulate_bits() do insn->n samples

by Ian Abbott

The `insn_rw_emulate_bits()` function is used as a default handler for `INSN_READ` instructions for subdevices that have a handler for `INSN_BITS` but not for `INSN_READ`. Similarly, it is used as a default handler for `INSN_WRITE` instructions for subdevices that have a handler for `INSN_BITS` but not for `INSN_WRITE`. It works by emulating the `INSN_READ` or `INSN_WRITE` instruction handling with a constructed `INSN_BITS` instruction. However, `INSN_READ` and `INSN_WRITE` instructions are supposed to be able read or write multiple samples, indicated by the `insn->n` value, but `insn_rw_emulate_bits()` currently only handles a single sample. For `INSN_READ`, the comedi core will copy `insn->n` samples back to user-space. (That triggered KASAN kernel-infoleak errors when `insn->n` was greater than 1, but that is being fixed more generally elsewhere in the comedi core.) Make `insn_rw_emulate_bits()` either handle `insn->n` samples, or return an error, to conform to the general expectation for `INSN_READ` and `INSN_WRITE` handlers. Fixes: ed9eccbe8970 ("Staging: add comedi core") Cc: <stable(a)vger.kernel.org> # 5.13+ Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> --- For 5.4.y and 5.10.y, this patch conflicts with submitted patches for upstream commit e9cb26291d00 ("comedi: Fix use of uninitialized data in insn_rw_emulate_bits()"). --- drivers/comedi/drivers.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c index f1dc854928c1..c9ebaadc5e82 100644 --- a/drivers/comedi/drivers.c +++ b/drivers/comedi/drivers.c @@ -620,11 +620,9 @@ static int insn_rw_emulate_bits(struct comedi_device *dev, unsigned int chan = CR_CHAN(insn->chanspec); unsigned int base_chan = (chan < 32) ? 0 : chan; unsigned int _data[2]; + unsigned int i; int ret; - if (insn->n == 0) - return 0; - memset(_data, 0, sizeof(_data)); memset(&_insn, 0, sizeof(_insn)); _insn.insn = INSN_BITS; @@ -635,18 +633,21 @@ static int insn_rw_emulate_bits(struct comedi_device *dev, if (insn->insn == INSN_WRITE) { if (!(s->subdev_flags & SDF_WRITABLE)) return -EINVAL; - _data[0] = 1U << (chan - base_chan); /* mask */ - _data[1] = data[0] ? (1U << (chan - base_chan)) : 0; /* bits */ + _data[0] = 1U << (chan - base_chan); /* mask */ } + for (i = 0; i < insn->n; i++) { + if (insn->insn == INSN_WRITE) + _data[1] = data[i] ? _data[0] : 0; /* bits */ - ret = s->insn_bits(dev, s, &_insn, _data); - if (ret < 0) - return ret; + ret = s->insn_bits(dev, s, &_insn, _data); + if (ret < 0) + return ret; - if (insn->insn == INSN_READ) - data[0] = (_data[1] >> (chan - base_chan)) & 1; + if (insn->insn == INSN_READ) + data[i] = (_data[1] >> (chan - base_chan)) & 1; + } - return 1; + return insn->n; } static int __comedi_device_postconfig_async(struct comedi_device *dev, -- 2.47.2

1 month

1
0
0 0

[PATCH] scsi: fill in DMA padding bytes in scsi_alloc_sgtables

by Petr Vaganov

During fuzz testing, the following issue was discovered: BUG: KMSAN: uninit-value in __dma_map_sg_attrs+0x217/0x310 __dma_map_sg_attrs+0x217/0x310 dma_map_sg_attrs+0x4a/0x70 ata_qc_issue+0x9f8/0x1420 __ata_scsi_queuecmd+0x1657/0x1740 ata_scsi_queuecmd+0x79a/0x920 scsi_queue_rq+0x4472/0x4f40 blk_mq_dispatch_rq_list+0x1cca/0x3ee0 __blk_mq_sched_dispatch_requests+0x458/0x630 blk_mq_sched_dispatch_requests+0x15b/0x340 __blk_mq_run_hw_queue+0xe5/0x250 __blk_mq_delay_run_hw_queue+0x138/0x780 blk_mq_run_hw_queue+0x4bb/0x7e0 blk_mq_sched_insert_request+0x2a7/0x4c0 blk_execute_rq+0x497/0x8a0 sg_io+0xbe0/0xe20 scsi_ioctl+0x2b36/0x3c60 sr_block_ioctl+0x319/0x440 blkdev_ioctl+0x80f/0xd70 __se_sys_ioctl+0x219/0x420 __x64_sys_ioctl+0x93/0xe0 x64_sys_call+0x1d6c/0x3ad0 do_syscall_64+0x4c/0xa0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Uninit was created at: __alloc_pages+0x5c0/0xc80 alloc_pages+0xe0e/0x1050 blk_rq_map_user_iov+0x2b77/0x6100 blk_rq_map_user_io+0x2fa/0x4d0 sg_io+0xad6/0xe20 scsi_ioctl+0x2b36/0x3c60 sr_block_ioctl+0x319/0x440 blkdev_ioctl+0x80f/0xd70 __se_sys_ioctl+0x219/0x420 __x64_sys_ioctl+0x93/0xe0 x64_sys_call+0x1d6c/0x3ad0 do_syscall_64+0x4c/0xa0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Bytes 14-15 of 16 are uninitialized Memory access of size 16 starts at ffff88800cbdb000 When processing the last unaligned element of the scatterlist, it is supplemented with missing bytes in the amount of pad_len. These bytes remain uninitialized, which leads to a problem. Add zeroing pad_len bytes of padding by pad_offset offset before increasing its length. This ensures that the DMA does not receive uninitialized data and eliminates the KMSAN warning. In this case, the pages are not located in highmem, but in the general case they might be, so kmap_local_page() is used for mapping. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 40b01b9bbdf5 ("block: update bio according to DMA alignment padding") Co-developed-by: Boris Tonofa <b.tonofa(a)ideco.ru> Signed-off-by: Boris Tonofa <b.tonofa(a)ideco.ru> Signed-off-by: Petr Vaganov <p.vaganov(a)ideco.ru> --- drivers/scsi/scsi_lib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 144c72f0737a..d287e24b6013 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1153,6 +1153,11 @@ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd) if (blk_rq_bytes(rq) & rq->q->limits.dma_pad_mask) { unsigned int pad_len = (rq->q->limits.dma_pad_mask & ~blk_rq_bytes(rq)) + 1; + unsigned int pad_offset = last_sg->offset + last_sg->length; + void *vaddr = kmap_local_page(sg_page(last_sg)); + + memset(vaddr + pad_offset, 0, pad_len); + kunmap_local(vaddr); last_sg->length += pad_len; cmd->extra_len += pad_len; -- 2.50.1

1 month

2
2
0 0

[PATCH v5] riscv: hwprobe: Fix stale vDSO data for late-initialized keys at boot

by Jingwei Wang

The hwprobe vDSO data for some keys, like MISALIGNED_VECTOR_PERF, is determined by an asynchronous kthread. This can create a race condition where the kthread finishes after the vDSO data has already been populated, causing userspace to read stale values. To fix this, a completion-based framework is introduced to robustly synchronize the async probes with the vDSO data population. The waiting function, init_hwprobe_vdso_data(), now blocks on wait_for_completion() until all probes signal they are done. Furthermore, to prevent this potential blocking from impacting boot performance, the initialization is deferred to late_initcall. This is safe as the data is only required by userspace (which starts much later) and moves the synchronization delay off the critical boot path. Reported-by: Tsukasa OI <research_trasio(a)irq.a4lg.com> Closes: https://lore.kernel.org/linux-riscv/760d637b-b13b-4518-b6bf-883d55d44e7f@ir… Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Cc: Palmer Dabbelt <palmer(a)dabbelt.com> Cc: Alexandre Ghiti <alexghiti(a)rivosinc.com> Cc: Olof Johansson <olof(a)lixom.net> Cc: stable(a)vger.kernel.org Signed-off-by: Jingwei Wang <wangjingwei(a)iscas.ac.cn> --- Changes in v5: - Reworked the synchronization logic to a robust "sentinel-count" pattern based on feedback from Alexandre. - Fixed a "multiple definition" linker error for nommu builds by changing the header-file stub functions to `static inline`, as pointed out by Olof. - Updated the commit message to better explain the rationale for moving the vDSO initialization to `late_initcall`. Changes in v4: - Reworked the synchronization mechanism based on feedback from Palmer and Alexandre. - Instead of a post-hoc refresh, this version introduces a robust completion-based framework using an atomic counter to ensure async probes are finished before populating the vDSO. - Moved the vdso data initialization to a late_initcall to avoid impacting boot time. Changes in v3: - Retained existing blank line. Changes in v2: - Addressed feedback from Yixun's regarding #ifdef CONFIG_MMU usage. - Updated commit message to provide a high-level summary. - Added Fixes tag for commit e7c9d66e313b. v1: https://lore.kernel.org/linux-riscv/20250521052754.185231-1-wangjingwei@isc… arch/riscv/include/asm/hwprobe.h | 8 +++++++- arch/riscv/kernel/sys_hwprobe.c | 20 +++++++++++++++++++- arch/riscv/kernel/unaligned_access_speed.c | 9 +++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 7fe0a379474ae2c6..3b2888126e659ea1 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -40,5 +40,11 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair, return pair->value == other_pair->value; } - +#ifdef CONFIG_MMU +void riscv_hwprobe_register_async_probe(void); +void riscv_hwprobe_complete_async_probe(void); +#else +static inline void riscv_hwprobe_register_async_probe(void) {} +static inline void riscv_hwprobe_complete_async_probe(void) {} +#endif #endif diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 0b170e18a2beba57..ee02aeb03e7bd3d8 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -5,6 +5,8 @@ * more details. */ #include <linux/syscalls.h> +#include <linux/completion.h> +#include <linux/atomic.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwprobe.h> @@ -467,6 +469,20 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, #ifdef CONFIG_MMU +static DECLARE_COMPLETION(boot_probes_done); +static atomic_t pending_boot_probes = ATOMIC_INIT(1); + +void riscv_hwprobe_register_async_probe(void) +{ + atomic_inc(&pending_boot_probes); +} + +void riscv_hwprobe_complete_async_probe(void) +{ + if (atomic_dec_and_test(&pending_boot_probes)) + complete(&boot_probes_done); +} + static int __init init_hwprobe_vdso_data(void) { struct vdso_arch_data *avd = vdso_k_arch_data; @@ -474,6 +490,8 @@ static int __init init_hwprobe_vdso_data(void) struct riscv_hwprobe pair; int key; + if (unlikely(!atomic_dec_and_test(&pending_boot_probes))) + wait_for_completion(&boot_probes_done); /* * Initialize vDSO data with the answers for the "all CPUs" case, to * save a syscall in the common case. @@ -504,7 +522,7 @@ static int __init init_hwprobe_vdso_data(void) return 0; } -arch_initcall_sync(init_hwprobe_vdso_data); +late_initcall(init_hwprobe_vdso_data); #endif /* CONFIG_MMU */ diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index ae2068425fbcd207..4b8ad2673b0f7470 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -379,6 +379,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { schedule_on_each_cpu(check_vector_unaligned_access); + riscv_hwprobe_complete_async_probe(); return 0; } @@ -473,8 +474,12 @@ static int __init check_unaligned_access_all_cpus(void) per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; } else if (!check_vector_unaligned_access_emulated_all_cpus() && IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { - kthread_run(vec_check_unaligned_access_speed_all_cpus, - NULL, "vec_check_unaligned_access_speed_all_cpus"); + riscv_hwprobe_register_async_probe(); + if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"))) { + pr_warn("Failed to create vec_unalign_check kthread\n"); + riscv_hwprobe_complete_async_probe(); + } } /* -- 2.50.0

1 month

2
3
0 0

[PATCH] comedi: Fix use of uninitialized memory in do_insn_ioctl() and do_insnlist_ioctl()

by Ian Abbott

syzbot reports a KMSAN kernel-infoleak in `do_insn_ioctl()`. A kernel buffer is allocated to hold `insn->n` samples (each of which is an `unsigned int`). For some instruction types, `insn->n` samples are copied back to user-space, unless an error code is being returned. The problem is that not all the instruction handlers that need to return data to userspace fill in the whole `insn->n` samples, so that there is an information leak. There is a similar syzbot report for `do_insnlist_ioctl()`, although it does not have a reproducer for it at the time of writing. One culprit is `insn_rw_emulate_bits()` which is used as the handler for `INSN_READ` or `INSN_WRITE` instructions for subdevices that do not have a specific handler for that instruction, but do have an `INSN_BITS` handler. For `INSN_READ` it only fills in at most 1 sample, so if `insn->n` is greater than 1, the remaining `insn->n - 1` samples copied to userspace will be uninitialized kernel data. Another culprit is `vm80xx_ai_insn_read()` in the "vm80xx" driver. It never returns an error, even if it fails to fill the buffer. Fix it in `do_insn_ioctl()` and `do_insnlist_ioctl()` by making sure that uninitialized parts of the allocated buffer are zeroed before handling each instruction. Thanks to Arnaud Lecomte for their fix to `do_insn_ioctl()`. That fix replaced the call to `kmalloc_array()` with `kcalloc()`, but it is not always necessary to clear the whole buffer. Fixes: ed9eccbe8970 ("Staging: add comedi core") Reported-by: syzbot+a5e45f768aab5892da5d(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a5e45f768aab5892da5d Reported-by: syzbot+fb4362a104d45ab09cf9(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fb4362a104d45ab09cf9 Cc: <stable(a)vger.kernel.org> #5.13+ Cc: Arnaud Lecomte <contact(a)arnaud-lcm.com> Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk> --- Cherry picks to fix this for 5.4.y and 5.10.y not yet available. --- drivers/comedi/comedi_fops.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c index 23b7178522ae..7e2f2b1a1c36 100644 --- a/drivers/comedi/comedi_fops.c +++ b/drivers/comedi/comedi_fops.c @@ -1587,6 +1587,9 @@ static int do_insnlist_ioctl(struct comedi_device *dev, memset(&data[n], 0, (MIN_SAMPLES - n) * sizeof(unsigned int)); } + } else { + memset(data, 0, max_t(unsigned int, n, MIN_SAMPLES) * + sizeof(unsigned int)); } ret = parse_insn(dev, insns + i, data, file); if (ret < 0) @@ -1670,6 +1673,8 @@ static int do_insn_ioctl(struct comedi_device *dev, memset(&data[insn->n], 0, (MIN_SAMPLES - insn->n) * sizeof(unsigned int)); } + } else { + memset(data, 0, n_data * sizeof(unsigned int)); } ret = parse_insn(dev, insn, data, file); if (ret < 0) -- 2.47.2

1 month

1
0
0 0

[stable backport request] x86/ioremap: Use is_ioremap_addr() in iounmap() for 5.15.y

by Arin Sun

Dear Stable Kernel Team and Maintainers, I am writing to request a backport of the following commit from the mainline kernel to the 5.15.y stable branch: Commit: x86/ioremap: Use is_ioremap_addr() in iounmap() ID: 50c6dbdfd16e312382842198a7919341ad480e05 Author: Max Ramanouski Merged in: Linux 6.11-rc1 (approximately August 2024) This commit fixes a bug in the iounmap() function for x86 architectures in kernel versions 5.x. Specifically, the original code uses a check against high_memory: if ((void __force *)addr <= high_memory) return; This can lead to memory leaks on certain x86 servers where ioremap() returns addresses that are not guaranteed to be greater than high_memory, causing the function to return early without properly unmapping the memory. The fix replaces this with is_ioremap_addr(), making the check more reliable: if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr))) return; I have checked the 5.15.y branch logs and did not find this backport. This issue affects production environments, particularly on customer machines where we cannot easily deploy custom kernels. Backporting this to 5.15.y (and possibly other LTS branches like 5.10.y if applicable) would help resolve the memory leak without requiring users to upgrade to 6.x series. Do you have plans to backport this commit? If not, could you please consider it for inclusion in the stable releases? Thank you for your time and efforts in maintaining the stable kernels. Best regards, xin.sun

1 month

2
1
0 0

[PATCH 6.6.y] arm64/cpufeatures/kvm: Add ARMv8.9 FEAT_ECBHB bits in ID_AA64MMFR1 register

by Roy, Patrick

From: Nianyao Tang <tangnianyao(a)huawei.com> [ upstream commit e8cde32f111f7f5681a7bad3ec747e9e697569a9 ] Enable ECBHB bits in ID_AA64MMFR1 register as per ARM DDI 0487K.a specification. When guest OS read ID_AA64MMFR1_EL1, kvm emulate this reg using ftr_id_aa64mmfr1 and always return ID_AA64MMFR1_EL1.ECBHB=0 to guest. It results in guest syscall jump to tramp ventry, which is not needed in implementation with ID_AA64MMFR1_EL1.ECBHB=1. Let's make the guest syscall process the same as the host. This fixes performance regressions introduced by commit 4117975672c4 ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") for guests running on neoverse v2 hardware, which supports ECBHB. Signed-off-by: Nianyao Tang <tangnianyao(a)huawei.com> Link: https://lore.kernel.org/r/20240611122049.2758600-1-tangnianyao@huawei.com Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com> Signed-off-by: Patrick Roy <roypat(a)amazon.co.uk> --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b6d381f743f3..2ce9ef9d924a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -364,6 +364,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0), -- 2.50.1

1 month

1
0
0 0

[PATCH] block: Fix bounce check logic in blk_queue_may_bounce()

by Hardeep Sharma

Buffer bouncing is needed only when memory exists above the lowmem region, i.e., when max_low_pfn < max_pfn. The previous check (max_low_pfn >= max_pfn) was inverted and prevented bouncing when it could actually be required. Note that bouncing depends on CONFIG_HIGHMEM, which is typically enabled on 32-bit ARM where not all memory is permanently mapped into the kernel’s lowmem region. Fixes: 9bb33f24abbd0 ("block: refactor the bounce buffering code") Cc: stable(a)vger.kernel.org Signed-off-by: Hardeep Sharma <quic_hardshar(a)quicinc.com> --- block/blk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk.h b/block/blk.h index 67915b04b3c1..f8a1d64be5a2 100644 --- a/block/blk.h +++ b/block/blk.h @@ -383,7 +383,7 @@ static inline bool blk_queue_may_bounce(struct request_queue *q) { return IS_ENABLED(CONFIG_BOUNCE) && q->limits.bounce == BLK_BOUNCE_HIGH && - max_low_pfn >= max_pfn; + max_low_pfn < max_pfn; } static inline struct bio *blk_queue_bounce(struct bio *bio, -- 2.25.1

1 month

1
0
0 0

[PATCH] sprintf.h requires stdarg.h

by Suchit Karunakaran

From: Stephen Rothwell <sfr(a)canb.auug.org.au> In file included from drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.c:4: include/linux/sprintf.h:11:54: error: unknown type name 'va_list' 11 | __printf(2, 0) int vsprintf(char *buf, const char *, va_list); | ^~~~~~~ include/linux/sprintf.h:1:1: note: 'va_list' is defined in header '<stdarg.h>'; this is probably fixable by adding '#include <stdarg.h>' Link: https://lkml.kernel.org/r/20250721173754.42865913@canb.auug.org.au Fixes: 39ced19b9e60 ("lib/vsprintf: split out sprintf() and friends") Signed-off-by: Stephen Rothwell <sfr(a)canb.auug.org.au> Cc: Andriy Shevchenko <andriy.shevchenko(a)linux.intel.com> Cc: Herbert Xu <herbert(a)gondor.apana.org.au> Cc: Petr Mladek <pmladek(a)suse.com> Cc: Steven Rostedt <rostedt(a)goodmis.org> Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk> Cc: Sergey Senozhatsky <senozhatsky(a)chromium.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Suchit Karunakaran <suchitkarunakaran(a)gmail.com> --- include/linux/sprintf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 51cab2def9ec..876130091384 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -4,6 +4,7 @@ #include <linux/compiler_attributes.h> #include <linux/types.h> +#include <linux/stdarg.h> int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); -- 2.39.5

1 month

1
0
0 0

[PATCH v5 1/8] mm/shmem, swap: improve cached mTHP handling and fix potential hung

by Kairui Song

From: Kairui Song <kasong(a)tencent.com> The current swap-in code assumes that, when a swap entry in shmem mapping is order 0, its cached folios (if present) must be order 0 too, which turns out not always correct. The problem is shmem_split_large_entry is called before verifying the folio will eventually be swapped in, one possible race is: CPU1 CPU2 shmem_swapin_folio /* swap in of order > 0 swap entry S1 */ folio = swap_cache_get_folio /* folio = NULL */ order = xa_get_order /* order > 0 */ folio = shmem_swap_alloc_folio /* mTHP alloc failure, folio = NULL */ <... Interrupted ...> shmem_swapin_folio /* S1 is swapped in */ shmem_writeout /* S1 is swapped out, folio cached */ shmem_split_large_entry(..., S1) /* S1 is split, but the folio covering it has order > 0 now */ Now any following swapin of S1 will hang: `xa_get_order` returns 0, and folio lookup will return a folio with order > 0. The `xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always return false causing swap-in to return -EEXIST. And this looks fragile. So fix this up by allowing seeing a larger folio in swap cache, and check the whole shmem mapping range covered by the swapin have the right swap value upon inserting the folio. And drop the redundant tree walks before the insertion. This will actually improve performance, as it avoids two redundant Xarray tree walks in the hot path, and the only side effect is that in the failure path, shmem may redundantly reallocate a few folios causing temporary slight memory pressure. And worth noting, it may seems the order and value check before inserting might help reducing the lock contention, which is not true. The swap cache layer ensures raced swapin will either see a swap cache folio or failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is bypassed), so holding the folio lock and checking the folio flag is already good enough for avoiding the lock contention. The chance that a folio passes the swap entry value check but the shmem mapping slot has changed should be very low. Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Signed-off-by: Kairui Song <kasong(a)tencent.com> Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com> Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> Tested-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> Cc: <stable(a)vger.kernel.org> --- mm/shmem.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 334b7b4a61a0..e3c9a1365ff4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -884,7 +884,9 @@ static int shmem_add_to_page_cache(struct folio *folio, pgoff_t index, void *expected, gfp_t gfp) { XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); - long nr = folio_nr_pages(folio); + unsigned long nr = folio_nr_pages(folio); + swp_entry_t iter, swap; + void *entry; VM_BUG_ON_FOLIO(index != round_down(index, nr), folio); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); @@ -896,14 +898,24 @@ static int shmem_add_to_page_cache(struct folio *folio, gfp &= GFP_RECLAIM_MASK; folio_throttle_swaprate(folio, gfp); + swap = iter = radix_to_swp_entry(expected); do { xas_lock_irq(&xas); - if (expected != xas_find_conflict(&xas)) { - xas_set_err(&xas, -EEXIST); - goto unlock; + xas_for_each_conflict(&xas, entry) { + /* + * The range must either be empty, or filled with + * expected swap entries. Shmem swap entries are never + * partially freed without split of both entry and + * folio, so there shouldn't be any holes. + */ + if (!expected || entry != swp_to_radix_entry(iter)) { + xas_set_err(&xas, -EEXIST); + goto unlock; + } + iter.val += 1 << xas_get_order(&xas); } - if (expected && xas_find_conflict(&xas)) { + if (expected && iter.val - nr != swap.val) { xas_set_err(&xas, -EEXIST); goto unlock; } @@ -2323,7 +2335,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, error = -ENOMEM; goto failed; } - } else if (order != folio_order(folio)) { + } else if (order > folio_order(folio)) { /* * Swap readahead may swap in order 0 folios into swapcache * asynchronously, while the shmem mapping can still stores @@ -2348,15 +2360,15 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); } + } else if (order < folio_order(folio)) { + swap.val = round_down(swap.val, 1 << folio_order(folio)); } alloced: /* We have to do this with folio locked to prevent races */ folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || - folio->swap.val != swap.val || - !shmem_confirm_swap(mapping, index, swap) || - xa_get_order(&mapping->i_pages, index) != folio_order(folio)) { + folio->swap.val != swap.val) { error = -EEXIST; goto unlock; } -- 2.50.0

1 month

2
4
0 0

[merged mm-stable] mm-damon-ops-common-ignore-migration-request-to-invalid-nodes.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/damon/ops-common: ignore migration request to invalid nodes has been removed from the -mm tree. Its filename was mm-damon-ops-common-ignore-migration-request-to-invalid-nodes.patch This patch was dropped because it was merged into the mm-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: SeongJae Park <sj(a)kernel.org> Subject: mm/damon/ops-common: ignore migration request to invalid nodes Date: Sun, 20 Jul 2025 11:58:22 -0700 damon_migrate_pages() tries migration even if the target node is invalid. If users mistakenly make such invalid requests via DAMOS_MIGRATE_{HOT,COLD} action, the below kernel BUG can happen. [ 7831.883495] BUG: unable to handle page fault for address: 0000000000001f48 [ 7831.884160] #PF: supervisor read access in kernel mode [ 7831.884681] #PF: error_code(0x0000) - not-present page [ 7831.885203] PGD 0 P4D 0 [ 7831.885468] Oops: Oops: 0000 [#1] SMP PTI [ 7831.885852] CPU: 31 UID: 0 PID: 94202 Comm: kdamond.0 Not tainted 6.16.0-rc5-mm-new-damon+ #93 PREEMPT(voluntary) [ 7831.886913] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.el9 04/01/2014 [ 7831.887777] RIP: 0010:__alloc_frozen_pages_noprof (include/linux/mmzone.h:1724 include/linux/mmzone.h:1750 mm/page_alloc.c:4936 mm/page_alloc.c:5137) [...] [ 7831.895953] Call Trace: [ 7831.896195] <TASK> [ 7831.896397] __folio_alloc_noprof (mm/page_alloc.c:5183 mm/page_alloc.c:5192) [ 7831.896787] migrate_pages_batch (mm/migrate.c:1189 mm/migrate.c:1851) [ 7831.897228] ? __pfx_alloc_migration_target (mm/migrate.c:2137) [ 7831.897735] migrate_pages (mm/migrate.c:2078) [ 7831.898141] ? __pfx_alloc_migration_target (mm/migrate.c:2137) [ 7831.898664] damon_migrate_folio_list (mm/damon/ops-common.c:321 mm/damon/ops-common.c:354) [ 7831.899140] damon_migrate_pages (mm/damon/ops-common.c:405) [...] Add a target node validity check in damon_migrate_pages(). The validity check is stolen from that of do_pages_move(), which is being used for the move_pages() system call. Link: https://lkml.kernel.org/r/20250720185822.1451-1-sj@kernel.org Fixes: b51820ebea65 ("mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion") [6.11.x] Signed-off-by: SeongJae Park <sj(a)kernel.org> Reviewed-by: Joshua Hahn <joshua.hahnjy(a)gmail.com> Cc: Honggyu Kim <honggyu.kim(a)sk.com> Cc: Hyeongtak Ji <hyeongtak.ji(a)sk.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/damon/ops-common.c | 4 ++++ 1 file changed, 4 insertions(+) --- a/mm/damon/ops-common.c~mm-damon-ops-common-ignore-migration-request-to-invalid-nodes +++ a/mm/damon/ops-common.c @@ -383,6 +383,10 @@ unsigned long damon_migrate_pages(struct if (list_empty(folio_list)) return nr_migrated; + if (target_nid < 0 || target_nid >= MAX_NUMNODES || + !node_state(target_nid, N_MEMORY)) + return nr_migrated; + noreclaim_flag = memalloc_noreclaim_save(); nid = folio_nid(lru_to_folio(folio_list)); _ Patches currently in -mm which might be from sj(a)kernel.org are selftests-damon-sysfspy-stop-damon-for-dumping-failures.patch selftests-damon-_damon_sysfs-support-damos-watermarks-setup.patch selftests-damon-_damon_sysfs-support-damos-filters-setup.patch selftests-damon-_damon_sysfs-support-monitoring-intervals-goal-setup.patch selftests-damon-_damon_sysfs-support-damos-quota-weights-setup.patch selftests-damon-_damon_sysfs-support-damos-quota-goal-nid-setup.patch selftests-damon-_damon_sysfs-support-damos-action-dests-setup.patch selftests-damon-_damon_sysfs-support-damos-target_nid-setup.patch selftests-damon-_damon_sysfs-use-232-1-as-max-nr_accesses-and-age.patch selftests-damon-drgn_dump_damon_status-dump-damos-migrate_dests.patch selftests-damon-drgn_dump_damon_status-dump-ctx-opsid.patch selftests-damon-drgn_dump_damon_status-dump-damos-filters.patch selftests-damon-sysfspy-generalize-damos-watermarks-commit-assertion.patch selftests-damon-sysfspy-generalize-damosquota-commit-assertion.patch selftests-damon-sysfspy-test-quota-goal-commitment.patch selftests-damon-sysfspy-test-damos-destinations-commitment.patch selftests-damon-sysfspy-generalize-damos-scheme-commit-assertion.patch selftests-damon-sysfspy-test-damos-filters-commitment.patch selftests-damon-sysfspy-generalize-damos-schemes-commit-assertion.patch selftests-damon-sysfspy-generalize-monitoring-attributes-commit-assertion.patch selftests-damon-sysfspy-generalize-damon-context-commit-assertion.patch selftests-damon-sysfspy-test-non-default-parameters-runtime-commit.patch selftests-damon-sysfspy-test-runtime-reduction-of-damon-parameters.patch

1 month

1
0
0 0

[merged mm-stable] mm-swap-fix-potensial-buffer-overflow-in-setup_clusters.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: swap: fix potential buffer overflow in setup_clusters() has been removed from the -mm tree. Its filename was mm-swap-fix-potensial-buffer-overflow-in-setup_clusters.patch This patch was dropped because it was merged into the mm-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Kemeng Shi <shikemeng(a)huaweicloud.com> Subject: mm: swap: fix potential buffer overflow in setup_clusters() Date: Thu, 22 May 2025 20:25:53 +0800 In setup_swap_map(), we only ensure badpages are in range (0, last_page]. As maxpages might be < last_page, setup_clusters() will encounter a buffer overflow when a badpage is >= maxpages. Only call inc_cluster_info_page() for badpage which is < maxpages to fix the issue. Link: https://lkml.kernel.org/r/20250522122554.12209-4-shikemeng@huaweicloud.com Fixes: b843786b0bd0 ("mm: swapfile: fix SSD detection with swapfile on btrfs") Signed-off-by: Kemeng Shi <shikemeng(a)huaweicloud.com> Reviewed-by: Baoquan He <bhe(a)redhat.com> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Kairui Song <kasong(a)tencent.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/swapfile.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) --- a/mm/swapfile.c~mm-swap-fix-potensial-buffer-overflow-in-setup_clusters +++ a/mm/swapfile.c @@ -3208,9 +3208,13 @@ static struct swap_cluster_info *setup_c * and the EOF part of the last cluster. */ inc_cluster_info_page(si, cluster_info, 0); - for (i = 0; i < swap_header->info.nr_badpages; i++) - inc_cluster_info_page(si, cluster_info, - swap_header->info.badpages[i]); + for (i = 0; i < swap_header->info.nr_badpages; i++) { + unsigned int page_nr = swap_header->info.badpages[i]; + + if (page_nr >= maxpages) + continue; + inc_cluster_info_page(si, cluster_info, page_nr); + } for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++) inc_cluster_info_page(si, cluster_info, i); _ Patches currently in -mm which might be from shikemeng(a)huaweicloud.com are

1 month

1
0
0 0

[merged mm-stable] mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: swap: correctly use maxpages in swapon syscall to avoid potential deadloop has been removed from the -mm tree. Its filename was mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop.patch This patch was dropped because it was merged into the mm-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Kemeng Shi <shikemeng(a)huaweicloud.com> Subject: mm: swap: correctly use maxpages in swapon syscall to avoid potential deadloop Date: Thu, 22 May 2025 20:25:52 +0800 We use maxpages from read_swap_header() to initialize swap_info_struct, however the maxpages might be reduced in setup_swap_extents() and the si->max is assigned with the reduced maxpages from the setup_swap_extents(). Obviously, this could lead to memory waste as we allocated memory based on larger maxpages, besides, this could lead to a potential deadloop as following: 1) When calling setup_clusters() with larger maxpages, unavailable pages within range [si->max, larger maxpages) are not accounted with inc_cluster_info_page(). As a result, these pages are assumed available but can not be allocated. The cluster contains these pages can be moved to frag_clusters list after it's all available pages were allocated. 2) When the cluster mentioned in 1) is the only cluster in frag_clusters list, cluster_alloc_swap_entry() assume order 0 allocation will never failed and will enter a deadloop by keep trying to allocate page from the only cluster in frag_clusters which contains no actually available page. Call setup_swap_extents() to get the final maxpages before swap_info_struct initialization to fix the issue. After this change, span will include badblocks and will become large value which I think is correct value: In summary, there are two kinds of swapfile_activate operations. 1. Filesystem style: Treat all blocks logical continuity and find usable physical extents in logical range. In this way, si->pages will be actual usable physical blocks and span will be "1 + highest_block - lowest_block". 2. Block device style: Treat all blocks physically continue and only one single extent is added. In this way, si->pages will be si->max and span will be "si->pages - 1". Actually, si->pages and si->max is only used in block device style and span value is set with si->pages. As a result, span value in block device style will become a larger value as you mentioned. I think larger value is correct based on: 1. Span value in filesystem style is "1 + highest_block - lowest_block" which is the range cover all possible phisical blocks including the badblocks. 2. For block device style, si->pages is the actual usable block number and is already in pr_info. The original span value before this patch is also refer to usable block number which is redundant in pr_info. [shikemeng(a)huaweicloud.com: ensure si->pages == si->max - 1 after setup_swap_extents()] Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com Link: https://lkml.kernel.org/r/20250718065139.61989-1-shikemeng@huaweicloud.com Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com Fixes: 661383c6111a ("mm: swap: relaim the cached parts that got scanned") Signed-off-by: Kemeng Shi <shikemeng(a)huaweicloud.com> Reviewed-by: Baoquan He <bhe(a)redhat.com> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Kairui Song <kasong(a)tencent.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/swapfile.c | 53 +++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) --- a/mm/swapfile.c~mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop +++ a/mm/swapfile.c @@ -3141,43 +3141,30 @@ static unsigned long read_swap_header(st return maxpages; } -static int setup_swap_map_and_extents(struct swap_info_struct *si, - union swap_header *swap_header, - unsigned char *swap_map, - unsigned long maxpages, - sector_t *span) +static int setup_swap_map(struct swap_info_struct *si, + union swap_header *swap_header, + unsigned char *swap_map, + unsigned long maxpages) { - unsigned int nr_good_pages; unsigned long i; - int nr_extents; - - nr_good_pages = maxpages - 1; /* omit header page */ + swap_map[0] = SWAP_MAP_BAD; /* omit header page */ for (i = 0; i < swap_header->info.nr_badpages; i++) { unsigned int page_nr = swap_header->info.badpages[i]; if (page_nr == 0 || page_nr > swap_header->info.last_page) return -EINVAL; if (page_nr < maxpages) { swap_map[page_nr] = SWAP_MAP_BAD; - nr_good_pages--; + si->pages--; } } - if (nr_good_pages) { - swap_map[0] = SWAP_MAP_BAD; - si->max = maxpages; - si->pages = nr_good_pages; - nr_extents = setup_swap_extents(si, span); - if (nr_extents < 0) - return nr_extents; - nr_good_pages = si->pages; - } - if (!nr_good_pages) { + if (!si->pages) { pr_warn("Empty swap-file\n"); return -EINVAL; } - return nr_extents; + return 0; } #define SWAP_CLUSTER_INFO_COLS \ @@ -3217,7 +3204,7 @@ static struct swap_cluster_info *setup_c * Mark unusable pages as unavailable. The clusters aren't * marked free yet, so no list operations are involved yet. * - * See setup_swap_map_and_extents(): header page, bad pages, + * See setup_swap_map(): header page, bad pages, * and the EOF part of the last cluster. */ inc_cluster_info_page(si, cluster_info, 0); @@ -3363,6 +3350,21 @@ SYSCALL_DEFINE2(swapon, const char __use goto bad_swap_unlock_inode; } + si->max = maxpages; + si->pages = maxpages - 1; + nr_extents = setup_swap_extents(si, &span); + if (nr_extents < 0) { + error = nr_extents; + goto bad_swap_unlock_inode; + } + if (si->pages != si->max - 1) { + pr_err("swap:%u != (max:%u - 1)\n", si->pages, si->max); + error = -EINVAL; + goto bad_swap_unlock_inode; + } + + maxpages = si->max; + /* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { @@ -3374,12 +3376,9 @@ SYSCALL_DEFINE2(swapon, const char __use if (error) goto bad_swap_unlock_inode; - nr_extents = setup_swap_map_and_extents(si, swap_header, swap_map, - maxpages, &span); - if (unlikely(nr_extents < 0)) { - error = nr_extents; + error = setup_swap_map(si, swap_header, swap_map, maxpages); + if (error) goto bad_swap_unlock_inode; - } /* * Use kvmalloc_array instead of bitmap_zalloc as the allocation order might _ Patches currently in -mm which might be from shikemeng(a)huaweicloud.com are

1 month

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror