 
            From: Dominique Martinet asmadeus@codewreck.org
[ Upstream commit 38d222b3163f7b7d737e5d999ffc890a12870e36 ]
It's possible for v9fs_fid_find "find by dentry" branch to not turn up anything despite having an entry set (because e.g. uid doesn't match), in which case the calling code will generally make an extra lookup to the server.
In this case we might have had better luck looking by inode, so fall back to look up by inode if we have one and the lookup by dentry failed.
Message-Id: 20240523210024.1214386-1-asmadeus@codewreck.org Reviewed-by: Christian Schoenebeck linux_oss@crudebyte.com Signed-off-by: Dominique Martinet asmadeus@codewreck.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/9p/fid.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index de009a33e0e26..f84412290a30c 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -131,10 +131,9 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) } } spin_unlock(&dentry->d_lock); - } else { - if (dentry->d_inode) - ret = v9fs_fid_find_inode(dentry->d_inode, false, uid, any); } + if (!ret && dentry->d_inode) + ret = v9fs_fid_find_inode(dentry->d_inode, false, uid, any);
return ret; }
 
            From: Pedro Falcato pedro.falcato@gmail.com
[ Upstream commit 79efebae4afc2221fa814c3cae001bede66ab259 ]
In the spirit of [1], avoid creating multiple slab caches with the same name. Instead, add the dev_name into the mix.
[1]: https://lore.kernel.org/all/20240807090746.2146479-1-pedro.falcato@gmail.com...
Signed-off-by: Pedro Falcato pedro.falcato@gmail.com Reported-by: syzbot+3c5d43e97993e1fa612b@syzkaller.appspotmail.com Message-ID: 20240807094725.2193423-1-pedro.falcato@gmail.com Signed-off-by: Dominique Martinet asmadeus@codewreck.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/9p/client.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/net/9p/client.c b/net/9p/client.c index b05f73c291b4b..e7ea6c5c7463d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -978,6 +978,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) int err; struct p9_client *clnt; char *client_id; + char *cache_name;
clnt = kmalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) @@ -1034,15 +1035,22 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto close_trans;
+ cache_name = kasprintf(GFP_KERNEL, "9p-fcall-cache-%s", dev_name); + if (!cache_name) { + err = -ENOMEM; + goto close_trans; + } + /* P9_HDRSZ + 4 is the smallest packet header we can have that is * followed by data accessed from userspace by read */ clnt->fcall_cache = - kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize, + kmem_cache_create_usercopy(cache_name, clnt->msize, 0, 0, P9_HDRSZ + 4, clnt->msize - (P9_HDRSZ + 4), NULL);
+ kfree(cache_name); return clnt;
close_trans:
 
            From: Eduard Zingerman eddyz87@gmail.com
[ Upstream commit a41b3828ec056a631ad22413d4560017fed5c3bd ]
This test was added because of a bug in verifier.c:sync_linked_regs(), upon range propagation it destroyed subreg_def marks for registers. The test is written in a way to return an upper half of a register that is affected by range propagation and must have it's subreg_def preserved. This gives a return value of 0 and leads to undefined return value if subreg_def mark is not preserved.
Signed-off-by: Eduard Zingerman eddyz87@gmail.com Signed-off-by: Andrii Nakryiko andrii@kernel.org Signed-off-by: Daniel Borkmann daniel@iogearbox.net Acked-by: Daniel Borkmann daniel@iogearbox.net Link: https://lore.kernel.org/bpf/20240924210844.1758441-2-eddyz87@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../selftests/bpf/progs/verifier_scalar_ids.c | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c index 13b29a7faa71a..d24d3a36ec144 100644 --- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -656,4 +656,71 @@ __naked void two_old_ids_one_cur_id(void) : __clobber_all); }
+SEC("socket") +/* Note the flag, see verifier.c:opt_subreg_zext_lo32_rnd_hi32() */ +__flag(BPF_F_TEST_RND_HI32) +__success +/* This test was added because of a bug in verifier.c:sync_linked_regs(), + * upon range propagation it destroyed subreg_def marks for registers. + * The subreg_def mark is used to decide whether zero extension instructions + * are needed when register is read. When BPF_F_TEST_RND_HI32 is set it + * also causes generation of statements to randomize upper halves of + * read registers. + * + * The test is written in a way to return an upper half of a register + * that is affected by range propagation and must have it's subreg_def + * preserved. This gives a return value of 0 and leads to undefined + * return value if subreg_def mark is not preserved. + */ +__retval(0) +/* Check that verifier believes r1/r0 are zero at exit */ +__log_level(2) +__msg("4: (77) r1 >>= 32 ; R1_w=0") +__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("6: (95) exit") +__msg("from 3 to 4") +__msg("4: (77) r1 >>= 32 ; R1_w=0") +__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("6: (95) exit") +/* Verify that statements to randomize upper half of r1 had not been + * generated. + */ +__xlated("call unknown") +__xlated("r0 &= 2147483647") +__xlated("w1 = w0") +/* This is how disasm.c prints BPF_ZEXT_REG at the moment, x86 and arm + * are the only CI archs that do not need zero extension for subregs. + */ +#if !defined(__TARGET_ARCH_x86) && !defined(__TARGET_ARCH_arm64) +__xlated("w1 = w1") +#endif +__xlated("if w0 < 0xa goto pc+0") +__xlated("r1 >>= 32") +__xlated("r0 = r1") +__xlated("exit") +__naked void linked_regs_and_subreg_def(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + /* make sure r0 is in 32-bit range, otherwise w1 = w0 won't + * assign same IDs to registers. + */ + "r0 &= 0x7fffffff;" + /* link w1 and w0 via ID */ + "w1 = w0;" + /* 'if' statement propagates range info from w0 to w1, + * but should not affect w1->subreg_def property. + */ + "if w0 < 10 goto +0;" + /* r1 is read here, on archs that require subreg zero + * extension this would cause zext patch generation. + */ + "r1 >>= 32;" + "r0 = r1;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + char _license[] SEC("license") = "GPL";
 
            From: Sergey Matsievskiy matsievskiysv@gmail.com
[ Upstream commit 9e9c4666abb5bb444dac37e2d7eb5250c8d52a45 ]
Controllers, supported by this driver, have two sets of registers:
* (main) interrupt registers control peripheral interrupt sources.
* device interrupt registers configure per-device (network interface) interrupts and act as an extra stage before the main interrupt registers.
In the driver unmask code, device trigger registers are used in the mask calculation of the main interrupt sticky register, mixing two kinds of registers.
Use the main interrupt trigger register instead.
Signed-off-by: Sergey Matsievskiy matsievskiysv@gmail.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Link: https://lore.kernel.org/all/20240925184416.54204-2-matsievskiysv@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/irq-mscc-ocelot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/irqchip/irq-mscc-ocelot.c b/drivers/irqchip/irq-mscc-ocelot.c index 4d0c3532dbe73..c19ab379e8c5e 100644 --- a/drivers/irqchip/irq-mscc-ocelot.c +++ b/drivers/irqchip/irq-mscc-ocelot.c @@ -37,7 +37,7 @@ static struct chip_props ocelot_props = { .reg_off_ena_clr = 0x1c, .reg_off_ena_set = 0x20, .reg_off_ident = 0x38, - .reg_off_trigger = 0x5c, + .reg_off_trigger = 0x4, .n_irq = 24, };
@@ -70,7 +70,7 @@ static struct chip_props jaguar2_props = { .reg_off_ena_clr = 0x1c, .reg_off_ena_set = 0x20, .reg_off_ident = 0x38, - .reg_off_trigger = 0x5c, + .reg_off_trigger = 0x4, .n_irq = 29, };
 
            From: Hannes Reinecke hare@suse.de
[ Upstream commit 782373ba27660ba7d330208cf5509ece6feb4545 ]
Commit 76d54bf20cdc ("nvme-tcp: don't access released socket during error recovery") added a mutex_lock() call for the queue->queue_lock in nvme_tcp_get_address(). However, the mutex_lock() races with mutex_destroy() in nvme_tcp_free_queue(), and causes the WARN below.
DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 3 PID: 34077 at kernel/locking/mutex.c:587 __mutex_lock+0xcf0/0x1220 Modules linked in: nvmet_tcp nvmet nvme_tcp nvme_fabrics iw_cm ib_cm ib_core pktcdvd nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables qrtr sunrpc ppdev 9pnet_virtio 9pnet pcspkr netfs parport_pc parport e1000 i2c_piix4 i2c_smbus loop fuse nfnetlink zram bochs drm_vram_helper drm_ttm_helper ttm drm_kms_helper xfs drm sym53c8xx floppy nvme scsi_transport_spi nvme_core nvme_auth serio_raw ata_generic pata_acpi dm_multipath qemu_fw_cfg [last unloaded: ib_uverbs] CPU: 3 UID: 0 PID: 34077 Comm: udisksd Not tainted 6.11.0-rc7 #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014 RIP: 0010:__mutex_lock+0xcf0/0x1220 Code: 08 84 d2 0f 85 c8 04 00 00 8b 15 ef b6 c8 01 85 d2 0f 85 78 f4 ff ff 48 c7 c6 20 93 ee af 48 c7 c7 60 91 ee af e8 f0 a7 6d fd <0f> 0b e9 5e f4 ff ff 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 RSP: 0018:ffff88811305f760 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88812c652058 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001 RBP: ffff88811305f8b0 R08: 0000000000000001 R09: ffffed1075c36341 R10: ffff8883ae1b1a0b R11: 0000000000010498 R12: 0000000000000000 R13: 0000000000000000 R14: dffffc0000000000 R15: ffff88812c652058 FS: 00007f9713ae4980(0000) GS:ffff8883ae180000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fcd78483c7c CR3: 0000000122c38000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> ? __warn.cold+0x5b/0x1af ? __mutex_lock+0xcf0/0x1220 ? report_bug+0x1ec/0x390 ? handle_bug+0x3c/0x80 ? exc_invalid_op+0x13/0x40 ? asm_exc_invalid_op+0x16/0x20 ? __mutex_lock+0xcf0/0x1220 ? nvme_tcp_get_address+0xc2/0x1e0 [nvme_tcp] ? __pfx___mutex_lock+0x10/0x10 ? __lock_acquire+0xd6a/0x59e0 ? nvme_tcp_get_address+0xc2/0x1e0 [nvme_tcp] nvme_tcp_get_address+0xc2/0x1e0 [nvme_tcp] ? __pfx_nvme_tcp_get_address+0x10/0x10 [nvme_tcp] nvme_sysfs_show_address+0x81/0xc0 [nvme_core] dev_attr_show+0x42/0x80 ? __asan_memset+0x1f/0x40 sysfs_kf_seq_show+0x1f0/0x370 seq_read_iter+0x2cb/0x1130 ? rw_verify_area+0x3b1/0x590 ? __mutex_lock+0x433/0x1220 vfs_read+0x6a6/0xa20 ? lockdep_hardirqs_on+0x78/0x100 ? __pfx_vfs_read+0x10/0x10 ksys_read+0xf7/0x1d0 ? __pfx_ksys_read+0x10/0x10 ? __x64_sys_openat+0x105/0x1d0 do_syscall_64+0x93/0x180 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? __pfx_ksys_read+0x10/0x10 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? do_syscall_64+0x9f/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f9713f55cfa Code: 55 48 89 e5 48 83 ec 20 48 89 55 e8 48 89 75 f0 89 7d f8 e8 e8 74 f8 ff 48 8b 55 e8 48 8b 75 f0 41 89 c0 8b 7d f8 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 2e 44 89 c7 48 89 45 f8 e8 42 75 f8 ff 48 8b RSP: 002b:00007ffd7f512e70 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000055c38f316859 RCX: 00007f9713f55cfa RDX: 0000000000000fff RSI: 00007ffd7f512eb0 RDI: 0000000000000011 RBP: 00007ffd7f512e90 R08: 0000000000000000 R09: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000055c38f317148 R13: 0000000000000000 R14: 00007f96f4004f30 R15: 000055c3b6b623c0 </TASK>
The WARN is observed when the blktests test case nvme/014 is repeated with tcp transport. It is rare, and 200 times repeat is required to recreate in some test environments.
To avoid the WARN, check the NVME_TCP_Q_LIVE flag before locking queue->queue_lock. The flag is cleared long time before the lock gets destroyed.
Signed-off-by: Hannes Reinecke hare@suse.de Signed-off-by: Shin'ichiro Kawasaki shinichiro.kawasaki@wdc.com Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/tcp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index f1d62d74426f0..be04c5f3856d2 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2444,10 +2444,11 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
len = nvmf_get_address(ctrl, buf, size);
+ if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) + return len; + mutex_lock(&queue->queue_lock);
- if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) - goto done; ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); if (ret > 0) { if (len > 0) @@ -2455,7 +2456,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", (len) ? "," : "", &src_addr); } -done: + mutex_unlock(&queue->queue_lock);
return len;
 
            From: SurajSonawane2415 surajsonawane0215@gmail.com
[ Upstream commit b402328a24ee7193a8ab84277c0c90ae16768126 ]
elevator_get_default() and elv_support_iosched() both check for whether or not q->tag_set is non-NULL, however it's not possible for them to be NULL. This messes up some static checkers, as the checking of tag_set isn't consistent.
Remove the checks, which both simplifies the logic and avoids checker errors.
Signed-off-by: SurajSonawane2415 surajsonawane0215@gmail.com Link: https://lore.kernel.org/r/20241007111416.13814-1-surajsonawane0215@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/elevator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c index 5ff093cb3cf8f..ba072d8f660e6 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -558,7 +558,7 @@ EXPORT_SYMBOL_GPL(elv_unregister); static inline bool elv_support_iosched(struct request_queue *q) { if (!queue_is_mq(q) || - (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))) + (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) return false; return true; } @@ -569,7 +569,7 @@ static inline bool elv_support_iosched(struct request_queue *q) */ static struct elevator_type *elevator_get_default(struct request_queue *q) { - if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) + if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) return NULL;
if (q->nr_hw_queues != 1 &&
 
            From: Stefan Blum stefanblum2004@gmail.com
[ Upstream commit 1a5cbb526ec4b885177d06a8bc04f38da7dbb1d9 ]
By default the track point does not work on the Asus Expertbook B2402FVA.
From libinput record i got the ID of the track point device: evdev: # Name: ASUE1201:00 04F3:32AE # ID: bus 0x18 vendor 0x4f3 product 0x32ae version 0x100
I found that the track point is functional, when i set the MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU class for the reported device.
Signed-off-by: Stefan Blum stefan.blum@gmail.com Signed-off-by: Jiri Kosina jkosina@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e7199ae2e3d91..7584e5a3aafeb 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2020,6 +2020,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ELAN, 0x3148) },
+ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_ELAN, 0x32ae) }, + /* Elitegroup panel */ { .driver_data = MT_CLS_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_ELITEGROUP,
 
            From: WangYuli wangyuli@uniontech.com
[ Upstream commit 7a5ab8071114344f62a8b1e64ed3452a77257d76 ]
The behavior of HONOR MagicBook Art 14 touchpad is not consistent after reboots, as sometimes it reports itself as a touchpad, and sometimes as a mouse.
Similarly to GLO-GXXX it is possible to call MT_QUIRK_FORCE_GET_FEATURE as a workaround to force set feature in mt_set_input_mode() for such special touchpad device.
[jkosina@suse.com: reword changelog a little bit] Link: https://gitlab.freedesktop.org/libinput/libinput/-/issues/1040 Signed-off-by: Wentao Guan guanwentao@uniontech.com Signed-off-by: WangYuli wangyuli@uniontech.com Reviewed-by: Benjamin Tissoires bentiss@kernel.org Signed-off-by: Jiri Kosina jkosina@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hid/hid-multitouch.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 7584e5a3aafeb..c2d79b2d6cdd2 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2093,6 +2093,11 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, 0x347d, 0x7853) },
+ /* HONOR MagicBook Art 14 touchpad */ + { .driver_data = MT_CLS_VTL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + 0x35cc, 0x0104) }, + /* Ilitek dual touch panel */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_ILITEK,
 
            From: Robin Murphy robin.murphy@arm.com
[ Upstream commit 0dfe314cdd0d378f96bb9c6bdc05c8120f48606d ]
CPRE workarounds are implicated in at least 5 MMU-500 errata, some of which remain unfixed. The comment and warning message have proven to be unhelpfully misleading about this scope, so reword them to get the point across with less risk of going out of date or confusing users.
Signed-off-by: Robin Murphy robin.murphy@arm.com Link: https://lore.kernel.org/r/dfa82171b5248ad7cf1f25592101a6eec36b8c9a.172840087... Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 9dc772f2cbb27..99030e6b16e7a 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -130,7 +130,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu)
/* * Disable MMU-500's not-particularly-beneficial next-page - * prefetcher for the sake of errata #841119 and #826419. + * prefetcher for the sake of at least 5 known errata. */ for (i = 0; i < smmu->num_context_banks; ++i) { reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); @@ -138,7 +138,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg); reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); if (reg & ARM_MMU500_ACTLR_CPRE) - dev_warn_once(smmu->dev, "Failed to disable prefetcher [errata #841119 and #826419], check ACR.CACHE_LOCK\n"); + dev_warn_once(smmu->dev, "Failed to disable prefetcher for errata workarounds, check SACR.CACHE_LOCK\n"); }
return 0;
 
            From: Greg Joyce gjoyce@linux.ibm.com
[ Upstream commit 0ce96a6708f34280a536263ee5c67e20c433dcce ]
Disable NVME_CC_CRIME so that CSTS.RDY indicates that the media is ready and able to handle commands without returning NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY.
Signed-off-by: Greg Joyce gjoyce@linux.ibm.com Reviewed-by: Nilay Shroff nilay@linux.ibm.com Tested-by: Nilay Shroff nilay@linux.ibm.com Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 82509f3679373..e25206c7de80c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2250,8 +2250,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) else ctrl->ctrl_config = NVME_CC_CSS_NVM;
- if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS) - ctrl->ctrl_config |= NVME_CC_CRIME; + /* + * Setting CRIME results in CSTS.RDY before the media is ready. This + * makes it possible for media related commands to return the error + * NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Until the driver is + * restructured to handle retries, disable CC.CRIME. + */ + ctrl->ctrl_config &= ~NVME_CC_CRIME;
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; @@ -2286,10 +2291,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) * devices are known to get this wrong. Use the larger of the * two values. */ - if (ctrl->ctrl_config & NVME_CC_CRIME) - ready_timeout = NVME_CRTO_CRIMT(crto); - else - ready_timeout = NVME_CRTO_CRWMT(crto); + ready_timeout = NVME_CRTO_CRWMT(crto);
if (ready_timeout < timeout) dev_warn_once(ctrl->device, "bad crto:%x cap:%llx\n",
 
            From: Rik van Riel riel@surriel.com
[ Upstream commit 434247637c66e1be2bc71a9987d4c3f0d8672387 ]
The kzmalloc call in bpf_check can fail when memory is very fragmented, which in turn can lead to an OOM kill.
Use kvzmalloc to fall back to vmalloc when memory is too fragmented to allocate an order 3 sized bpf verifier environment.
Admittedly this is not a very common case, and only happens on systems where memory has already been squeezed close to the limit, but this does not seem like much of a hot path, and it's a simple enough fix.
Signed-off-by: Rik van Riel riel@surriel.com Reviewed-by: Shakeel Butt shakeel.butt@linux.dev Link: https://lore.kernel.org/r/20241008170735.16766766@imladris.surriel.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3032a464d31bb..50999c7b3b0e1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20226,7 +20226,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM;
@@ -20446,6 +20446,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 mutex_unlock(&bpf_verifier_lock); vfree(env->insn_aux_data); err_free_env: - kfree(env); + kvfree(env); return ret; }
 
            From: Herbert Xu herbert@gondor.apana.org.au
[ Upstream commit b81e286ba154a4e0f01a94d99179a97f4ba3e396 ]
As algorithm testing is carried out without holding the main crypto lock, it is always possible for the algorithm to go away during the test.
So before crypto_alg_tested updates the status of the tested alg, it checks whether it's still on the list of all algorithms. This is inaccurate because it may be off the main list but still on the list of algorithms to be removed.
Updating the algorithm status is safe per se as the larval still holds a reference to it. However, killing spawns of other algorithms that are of lower priority is clearly a deficiency as it adds unnecessary churn.
Fix the test by checking whether the algorithm is dead.
Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- crypto/algapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crypto/algapi.c b/crypto/algapi.c index 85bc279b4233f..b3a6086042530 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -396,7 +396,7 @@ void crypto_alg_tested(const char *name, int err) q->cra_flags |= CRYPTO_ALG_DEAD; alg = test->adult;
- if (list_empty(&alg->cra_list)) + if (crypto_is_dead(alg)) goto complete;
if (err == -ECANCELED)
 
            From: Herbert Xu herbert@gondor.apana.org.au
[ Upstream commit e845d2399a00f866f287e0cefbd4fc7d8ef0d2f7 ]
Disable cesa hash algorithms by lowering the priority because they appear to be broken when invoked in parallel. This allows them to still be tested for debugging purposes.
Reported-by: Klaus Kudielka klaus.kudielka@gmail.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/crypto/marvell/cesa/hash.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index 8d84ad45571c7..f150861ceaf69 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -947,7 +947,7 @@ struct ahash_alg mv_md5_alg = { .base = { .cra_name = "md5", .cra_driver_name = "mv-md5", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1018,7 +1018,7 @@ struct ahash_alg mv_sha1_alg = { .base = { .cra_name = "sha1", .cra_driver_name = "mv-sha1", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1092,7 +1092,7 @@ struct ahash_alg mv_sha256_alg = { .base = { .cra_name = "sha256", .cra_driver_name = "mv-sha256", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1302,7 +1302,7 @@ struct ahash_alg mv_ahmac_md5_alg = { .base = { .cra_name = "hmac(md5)", .cra_driver_name = "mv-hmac-md5", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1373,7 +1373,7 @@ struct ahash_alg mv_ahmac_sha1_alg = { .base = { .cra_name = "hmac(sha1)", .cra_driver_name = "mv-hmac-sha1", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1444,7 +1444,7 @@ struct ahash_alg mv_ahmac_sha256_alg = { .base = { .cra_name = "hmac(sha256)", .cra_driver_name = "mv-hmac-sha256", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY,
 
            From: Julian Vetter jvetter@kalrayinc.com
[ Upstream commit ad6639f143a0b42d7fb110ad14f5949f7c218890 ]
When building for the UM arch and neither INDIRECT_IOMEM=y, nor HAS_IOMEM=y is selected, it will fall back to the implementations from asm-generic/io.h for IO memcpy. But these fall-back functions just do a memcpy. So, instead of depending on UML, add dependency on 'HAS_IOMEM || INDIRECT_IOMEM'.
Reviewed-by: Yann Sionneau ysionneau@kalrayinc.com Signed-off-by: Julian Vetter jvetter@kalrayinc.com Link: https://patch.msgid.link/20241010124601.700528-1-jvetter@kalrayinc.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/Kconfig b/sound/Kconfig index 4c036a9a420ab..8b40205394fe0 100644 --- a/sound/Kconfig +++ b/sound/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig SOUND tristate "Sound card support" - depends on HAS_IOMEM || UML + depends on HAS_IOMEM || INDIRECT_IOMEM help If you have a sound card in your computer, i.e. if it can say more than an occasional beep, say Y.
 
            From: Ian Forbes ian.forbes@broadcom.com
[ Upstream commit 28a5dfd4f615539fb22fb6d5c219c199c14e6eb6 ]
Currently the array size is only limited by the largest kmalloc size which is incorrect. This change will also return a more specific error message than ENOMEM to userspace.
Signed-off-by: Ian Forbes ian.forbes@broadcom.com Reviewed-by: Zack Rusin zack.rusin@broadcom.com Reviewed-by: Martin Krastev martin.krastev@broadcom.com Signed-off-by: Zack Rusin zack.rusin@broadcom.com Link: https://patchwork.freedesktop.org/patch/msgid/20240808200634.1074083-1-ian.f... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 4 +++- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 3 --- 3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index ac3d7ff3f5bb9..def98d868deb4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -61,7 +61,7 @@ #define VMWGFX_DRIVER_MINOR 20 #define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) -#define VMWGFX_MAX_DISPLAYS 16 +#define VMWGFX_NUM_DISPLAY_UNITS 8 #define VMWGFX_CMD_BOUNCE_INIT_SIZE 32768
#define VMWGFX_MIN_INITIAL_WIDTH 1280 @@ -81,7 +81,7 @@ #define VMWGFX_NUM_GB_CONTEXT 256 #define VMWGFX_NUM_GB_SHADER 20000 #define VMWGFX_NUM_GB_SURFACE 32768 -#define VMWGFX_NUM_GB_SCREEN_TARGET VMWGFX_MAX_DISPLAYS +#define VMWGFX_NUM_GB_SCREEN_TARGET VMWGFX_NUM_DISPLAY_UNITS #define VMWGFX_NUM_DXCONTEXT 256 #define VMWGFX_NUM_DXQUERY 512 #define VMWGFX_NUM_MOB (VMWGFX_NUM_GB_CONTEXT +\ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 08f2470edab27..823ed3d9bff2b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2326,7 +2326,7 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, struct drm_mode_config *mode_config = &dev->mode_config; struct drm_vmw_update_layout_arg *arg = (struct drm_vmw_update_layout_arg *)data; - void __user *user_rects; + const void __user *user_rects; struct drm_vmw_rect *rects; struct drm_rect *drm_rects; unsigned rects_size; @@ -2338,6 +2338,8 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, VMWGFX_MIN_INITIAL_HEIGHT}; vmw_du_update_layout(dev_priv, 1, &def_rect); return 0; + } else if (arg->num_outputs > VMWGFX_NUM_DISPLAY_UNITS) { + return -E2BIG; }
rects_size = arg->num_outputs * sizeof(struct drm_vmw_rect); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 19a843da87b78..ec86f92517a14 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -198,9 +198,6 @@ struct vmw_kms_dirty { s32 unit_y2; };
-#define VMWGFX_NUM_DISPLAY_UNITS 8 - - #define vmw_framebuffer_to_vfb(x) \ container_of(x, struct vmw_framebuffer, base) #define vmw_framebuffer_to_vfbs(x) \
 
            From: Showrya M N showrya@chelsio.com
[ Upstream commit 4e1e3dd88a4cedd5ccc1a3fc3d71e03b70a7a791 ]
While running ISER over SIW, the initiator machine encounters a warning from skb_splice_from_iter() indicating that a slab page is being used in send_page. To address this, it is better to add a sendpage_ok() check within the driver itself, and if it returns 0, then MSG_SPLICE_PAGES flag should be disabled before entering the network stack.
A similar issue has been discussed for NVMe in this thread: https://lore.kernel.org/all/20240530142417.146696-1-ofir.gal@volumez.com/
WARNING: CPU: 0 PID: 5342 at net/core/skbuff.c:7140 skb_splice_from_iter+0x173/0x320 Call Trace: tcp_sendmsg_locked+0x368/0xe40 siw_tx_hdt+0x695/0xa40 [siw] siw_qp_sq_process+0x102/0xb00 [siw] siw_sq_resume+0x39/0x110 [siw] siw_run_sq+0x74/0x160 [siw] kthread+0xd2/0x100 ret_from_fork+0x34/0x40 ret_from_fork_asm+0x1a/0x30
Link: https://patch.msgid.link/r/20241007125835.89942-1-showrya@chelsio.com Signed-off-by: Showrya M N showrya@chelsio.com Signed-off-by: Potnuri Bharat Teja bharat@chelsio.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/sw/siw/siw_qp_tx.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 60b6a41359611..feae920784be8 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -337,6 +337,8 @@ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, msg.msg_flags &= ~MSG_MORE;
tcp_rate_check_app_limited(sk); + if (!sendpage_ok(page[i])) + msg.msg_flags &= ~MSG_SPLICE_PAGES; bvec_set_page(&bvec, page[i], bytes, offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
 
            From: Will Deacon will@kernel.org
[ Upstream commit 7aed6a2c51ffc97a126e0ea0c270fab7af97ae18 ]
Syzbot reports a KASAN failure early during boot on arm64 when building with GCC 12.2.0 and using the Software Tag-Based KASAN mode:
| BUG: KASAN: invalid-access in smp_build_mpidr_hash arch/arm64/kernel/setup.c:133 [inline] | BUG: KASAN: invalid-access in setup_arch+0x984/0xd60 arch/arm64/kernel/setup.c:356 | Write of size 4 at addr 03ff800086867e00 by task swapper/0 | Pointer tag: [03], memory tag: [fe]
Initial triage indicates that the report is a false positive and a thorough investigation of the crash by Mark Rutland revealed the root cause to be a bug in GCC:
When GCC is passed `-fsanitize=hwaddress` or `-fsanitize=kernel-hwaddress` it ignores `__attribute__((no_sanitize_address))`, and instruments functions we require are not instrumented.
[...]
All versions [of GCC] I tried were broken, from 11.3.0 to 14.2.0 inclusive.
I think we have to disable KASAN_SW_TAGS with GCC until this is fixed
Disable Software Tag-Based KASAN when building with GCC by making CC_HAS_KASAN_SW_TAGS depend on !CC_IS_GCC.
Cc: Andrey Konovalov andreyknvl@gmail.com Suggested-by: Mark Rutland mark.rutland@arm.com Reported-by: syzbot+908886656a02769af987@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/000000000000f362e80620e27859@google.com Link: https://lore.kernel.org/r/ZvFGwKfoC4yVjN_X@J2N7QTR9R3 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218854 Reviewed-by: Andrey Konovalov andreyknvl@gmail.com Acked-by: Mark Rutland mark.rutland@arm.com Link: https://lore.kernel.org/r/20241014161100.18034-1-will@kernel.org Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- lib/Kconfig.kasan | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index fdca89c057452..275e6295fcd78 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -22,8 +22,11 @@ config ARCH_DISABLE_KASAN_INLINE config CC_HAS_KASAN_GENERIC def_bool $(cc-option, -fsanitize=kernel-address)
+# GCC appears to ignore no_sanitize_address when -fsanitize=kernel-hwaddress +# is passed. See https://bugzilla.kernel.org/show_bug.cgi?id=218854 (and +# the linked LKML thread) for more details. config CC_HAS_KASAN_SW_TAGS - def_bool $(cc-option, -fsanitize=kernel-hwaddress) + def_bool !CC_IS_GCC && $(cc-option, -fsanitize=kernel-hwaddress)
# This option is only required for software KASAN modes. # Old GCC versions do not have proper support for no_sanitize_address. @@ -100,7 +103,7 @@ config KASAN_SW_TAGS help Enables Software Tag-Based KASAN.
- Requires GCC 11+ or Clang. + Requires Clang.
Supported only on arm64 CPUs and relies on Top Byte Ignore.
 
            From: Keith Busch kbusch@kernel.org
[ Upstream commit 1f021341eef41e77a633186e9be5223de2ce5d48 ]
We need to suppress the partition scan from occuring within the controller's scan_work context. If a path error occurs here, the IO will wait until a path becomes available or all paths are torn down, but that action also occurs within scan_work, so it would deadlock. Defer the partion scan to a different context that does not block scan_work.
Reported-by: Hannes Reinecke hare@suse.de Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/multipath.c | 33 +++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 1 + 2 files changed, 34 insertions(+)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 37ea0fa421da8..ede2a14dad8be 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -499,6 +499,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head) return ret; }
+static void nvme_partition_scan_work(struct work_struct *work) +{ + struct nvme_ns_head *head = + container_of(work, struct nvme_ns_head, partition_scan_work); + + if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN, + &head->disk->state))) + return; + + mutex_lock(&head->disk->open_mutex); + bdev_disk_changed(head->disk, false); + mutex_unlock(&head->disk->open_mutex); +} + static void nvme_requeue_work(struct work_struct *work) { struct nvme_ns_head *head = @@ -525,6 +539,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) bio_list_init(&head->requeue_list); spin_lock_init(&head->requeue_lock); INIT_WORK(&head->requeue_work, nvme_requeue_work); + INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
/* * Add a multipath node if the subsystems supports multiple controllers. @@ -540,6 +555,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) return -ENOMEM; head->disk->fops = &nvme_ns_head_ops; head->disk->private_data = head; + + /* + * We need to suppress the partition scan from occuring within the + * controller's scan_work context. If a path error occurs here, the IO + * will wait until a path becomes available or all paths are torn down, + * but that action also occurs within scan_work, so it would deadlock. + * Defer the partion scan to a different context that does not block + * scan_work. + */ + set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state); sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance);
@@ -589,6 +614,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) return; } nvme_add_ns_head_cdev(head); + kblockd_schedule_work(&head->partition_scan_work); }
mutex_lock(&head->lock); @@ -889,6 +915,12 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) kblockd_schedule_work(&head->requeue_work); if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { nvme_cdev_del(&head->cdev, &head->cdev_device); + /* + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared + * to allow multipath to fail all I/O. + */ + synchronize_srcu(&head->srcu); + kblockd_schedule_work(&head->requeue_work); del_gendisk(head->disk); } } @@ -900,6 +932,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); + flush_work(&head->partition_scan_work); put_disk(head->disk); }
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 799f8a2bb0b4f..14a867245c29f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -476,6 +476,7 @@ struct nvme_ns_head { struct bio_list requeue_list; spinlock_t requeue_lock; struct work_struct requeue_work; + struct work_struct partition_scan_work; struct mutex lock; unsigned long flags; #define NVME_NSHEAD_DISK_LIVE 0
 
            From: Philip Yang Philip.Yang@amd.com
[ Upstream commit 68d26c10ef503175df3142db6fcd75dd94860592 ]
Process device data pdd->vram_usage is read by rocm-smi via sysfs, this is currently missing the svm_bo usage accounting, so "rocm-smi --showpids" per process VRAM usage report is incorrect.
Add pdd->vram_usage accounting when svm_bo allocation and release, change to atomic64_t type because it is updated outside process mutex now.
Signed-off-by: Philip Yang Philip.Yang@amd.com Reviewed-by: Felix Kuehling felix.kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com (cherry picked from commit 98c0b0efcc11f2a5ddf3ce33af1e48eedf808b04) Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 26 ++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 19d46be639429..8669677662d0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1164,7 +1164,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) size >>= 1; - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); }
mutex_unlock(&p->mutex); @@ -1235,7 +1235,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, kfd_process_device_remove_obj_handle( pdd, GET_IDR_HANDLE(args->handle));
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); + atomic64_sub(size, &pdd->vram_usage);
err_unlock: err_pdd: @@ -2352,7 +2352,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { bo_bucket->restored_offset = offset; /* Update the VRAM usage count */ - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); + atomic64_add(bo_bucket->size, &pdd->vram_usage); } return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 67204c3dfbb8f..27c9d5c43765a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -765,7 +765,7 @@ struct kfd_process_device { enum kfd_pdd_bound bound;
/* VRAM usage */ - uint64_t vram_usage; + atomic64_t vram_usage; struct attribute attr_vram; char vram_filename[MAX_SYSFS_FILENAME_LEN];
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 43f520b379670..6c90231e0aec2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -306,7 +306,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, } else if (strncmp(attr->name, "vram_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_vram); - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage)); } else if (strncmp(attr->name, "sdma_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_sdma); @@ -1589,7 +1589,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false; pdd->runtime_inuse = false; - pdd->vram_usage = 0; + atomic64_set(&pdd->vram_usage, 0); pdd->sdma_past_activity_counter = 0; pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ce76d45549984..6b7c6f45a80a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -391,6 +391,27 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); + + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { + struct kfd_process_device *pdd; + struct kfd_process *p; + struct mm_struct *mm; + + mm = svm_bo->eviction_fence->mm; + /* + * The forked child process takes svm_bo device pages ref, svm_bo could be + * released after parent process is gone. + */ + p = kfd_lookup_process_by_mm(mm); + if (p) { + pdd = kfd_get_process_device_data(svm_bo->node, p); + if (pdd) + atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage); + kfd_unref_process(p); + } + mmput(mm); + } + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) /* We're not in the eviction worker. Signal the fence. */ dma_fence_signal(&svm_bo->eviction_fence->base); @@ -518,6 +539,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear) { + struct kfd_process_device *pdd; struct amdgpu_bo_param bp; struct svm_range_bo *svm_bo; struct amdgpu_bo_user *ubo; @@ -609,6 +631,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, list_add(&prange->svm_bo_list, &svm_bo->range_list); spin_unlock(&svm_bo->list_lock);
+ pdd = svm_range_get_pdd_by_node(prange, node); + if (pdd) + atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage); + return 0;
reserve_bo_failed:
 
            From: Michael Ellerman mpe@ellerman.id.au
[ Upstream commit cf8989d20d64ad702a6210c11a0347ebf3852aa7 ]
In opal_event_init() if request_irq() fails name is not freed, leading to a memory leak. The code only runs at boot time, there's no way for a user to trigger it, so there's no security impact.
Fix the leak by freeing name in the error path.
Reported-by: 2639161967 2639161967@qq.com Closes: https://lore.kernel.org/linuxppc-dev/87wmjp3wig.fsf@mail.lhotse Signed-off-by: Michael Ellerman mpe@ellerman.id.au Link: https://patch.msgid.link/20240920093520.67997-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/platforms/powernv/opal-irqchip.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 56a1f7ce78d2c..d92759c21fae9 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -282,6 +282,7 @@ int __init opal_event_init(void) name, NULL); if (rc) { pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start); + kfree(name); continue; } }
 
            From: Linus Walleij linus.walleij@linaro.org
[ Upstream commit 906b77ca91c7e9833b4e47bedb6bec76be71d497 ]
Add Unimac mdio compatible string for the special BCM6846 variant.
This variant has a few extra registers compared to other versions.
Suggested-by: Florian Fainelli florian.fainelli@broadcom.com Link: https://lore.kernel.org/linux-devicetree/b542b2e8-115c-4234-a464-e73aa6bece5... Signed-off-by: Linus Walleij linus.walleij@linaro.org Link: https://patch.msgid.link/20241012-bcm6846-mdio-v1-2-c703ca83e962@linaro.org Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/mdio/mdio-bcm-unimac.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c index 6b26a0803696d..a29838be335c9 100644 --- a/drivers/net/mdio/mdio-bcm-unimac.c +++ b/drivers/net/mdio/mdio-bcm-unimac.c @@ -336,6 +336,7 @@ static SIMPLE_DEV_PM_OPS(unimac_mdio_pm_ops, static const struct of_device_id unimac_mdio_ids[] = { { .compatible = "brcm,asp-v2.1-mdio", }, { .compatible = "brcm,asp-v2.0-mdio", }, + { .compatible = "brcm,bcm6846-mdio", }, { .compatible = "brcm,genet-mdio-v5", }, { .compatible = "brcm,genet-mdio-v4", }, { .compatible = "brcm,genet-mdio-v3", },
 
            From: Nilay Shroff nilay@linux.ibm.com
[ Upstream commit c199fac88fe7c749f88a0653e9f621b9f5a71cf1 ]
While shutting down loop controller, we first quiesce the admin/IO queue, delete the admin/IO tag-set and then at last destroy the admin/IO queue. However it's quite possible that during the window between quiescing and destroying of the admin/IO queue, some admin/IO request might sneak in and if that happens then we could potentially encounter a hung task because shutdown operation can't forward progress until any pending I/O is flushed off.
This commit helps ensure that before destroying the admin/IO queue, we unquiesce the admin/IO queue so that any outstanding requests, which are added after the admin/IO queue is quiesced, are now flushed to its completion.
Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Nilay Shroff nilay@linux.ibm.com Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/target/loop.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 48d5df054cd02..bd61a1b82c4cd 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -265,6 +265,13 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags)) return; + /* + * It's possible that some requests might have been added + * after admin queue is stopped/quiesced. So now start the + * queue to flush these requests to the completion. + */ + nvme_unquiesce_admin_queue(&ctrl->ctrl); + nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); nvme_remove_admin_tag_set(&ctrl->ctrl); } @@ -297,6 +304,12 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); } ctrl->ctrl.queue_count = 1; + /* + * It's possible that some requests might have been added + * after io queue is stopped/quiesced. So now start the + * queue to flush these requests to the completion. + */ + nvme_unquiesce_io_queues(&ctrl->ctrl); }
static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
 
            From: Nilay Shroff nilay@linux.ibm.com
[ Upstream commit d06923670b5a5f609603d4a9fee4dec02d38de9c ]
The nvme keep-alive operation, which executes at a periodic interval, could potentially sneak in while shutting down a fabric controller. This may lead to a race between the fabric controller admin queue destroy code path (invoked while shutting down controller) and hw/hctx queue dispatcher called from the nvme keep-alive async request queuing operation. This race could lead to the kernel crash shown below:
Call Trace: autoremove_wake_function+0x0/0xbc (unreliable) __blk_mq_sched_dispatch_requests+0x114/0x24c blk_mq_sched_dispatch_requests+0x44/0x84 blk_mq_run_hw_queue+0x140/0x220 nvme_keep_alive_work+0xc8/0x19c [nvme_core] process_one_work+0x200/0x4e0 worker_thread+0x340/0x504 kthread+0x138/0x140 start_kernel_thread+0x14/0x18
While shutting down fabric controller, if nvme keep-alive request sneaks in then it would be flushed off. The nvme_keep_alive_end_io function is then invoked to handle the end of the keep-alive operation which decrements the admin->q_usage_counter and assuming this is the last/only request in the admin queue then the admin->q_usage_counter becomes zero. If that happens then blk-mq destroy queue operation (blk_mq_destroy_ queue()) which could be potentially running simultaneously on another cpu (as this is the controller shutdown code path) would forward progress and deletes the admin queue. So, now from this point onward we are not supposed to access the admin queue resources. However the issue here's that the nvme keep-alive thread running hw/hctx queue dispatch operation hasn't yet finished its work and so it could still potentially access the admin queue resource while the admin queue had been already deleted and that causes the above crash.
This fix helps avoid the observed crash by implementing keep-alive as a synchronous operation so that we decrement admin->q_usage_counter only after keep-alive command finished its execution and returns the command status back up to its caller (blk_execute_rq()). This would ensure that fabric shutdown code path doesn't destroy the fabric admin queue until keep-alive request finished execution and also keep-alive thread is not running hw/hctx queue dispatch operation.
Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Nilay Shroff nilay@linux.ibm.com Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e25206c7de80c..b3c5460c6d768 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1178,10 +1178,9 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) nvme_keep_alive_work_period(ctrl)); }
-static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, - blk_status_t status) +static void nvme_keep_alive_finish(struct request *rq, + blk_status_t status, struct nvme_ctrl *ctrl) { - struct nvme_ctrl *ctrl = rq->end_io_data; unsigned long flags; bool startka = false; unsigned long rtt = jiffies - (rq->deadline - rq->timeout); @@ -1199,13 +1198,11 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, delay = 0; }
- blk_mq_free_request(rq); - if (status) { dev_err(ctrl->device, "failed nvme_keep_alive_end_io error=%d\n", status); - return RQ_END_IO_NONE; + return; }
ctrl->ka_last_check_time = jiffies; @@ -1217,7 +1214,6 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, spin_unlock_irqrestore(&ctrl->lock, flags); if (startka) queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); - return RQ_END_IO_NONE; }
static void nvme_keep_alive_work(struct work_struct *work) @@ -1226,6 +1222,7 @@ static void nvme_keep_alive_work(struct work_struct *work) struct nvme_ctrl, ka_work); bool comp_seen = ctrl->comp_seen; struct request *rq; + blk_status_t status;
ctrl->ka_last_check_time = jiffies;
@@ -1248,9 +1245,9 @@ static void nvme_keep_alive_work(struct work_struct *work) nvme_init_request(rq, &ctrl->ka_cmd);
rq->timeout = ctrl->kato * HZ; - rq->end_io = nvme_keep_alive_end_io; - rq->end_io_data = ctrl; - blk_execute_rq_nowait(rq, false); + status = blk_execute_rq(rq, false); + nvme_keep_alive_finish(rq, status, ctrl); + blk_mq_free_request(rq); }
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
linux-stable-mirror@lists.linaro.org
