uevent_show() wants to de-reference dev->driver->name. There is no clean
way for a device attribute to de-reference dev->driver unless that
attribute is defined via (struct device_driver).dev_groups. Instead, the
anti-pattern of taking the device_lock() in the attribute handler risks
deadlocks with code paths that remove device attributes while holding
the lock.
This deadlock is typically invisible to lockdep given the device_lock()
is marked lockdep_set_novalidate_class(), but some subsystems allocate a
local lockdep key for @dev->mutex to reveal reports of the form:
======================================================
WARNING: possible circular locking dependency detected
6.10.0-rc7+ #275 Tainted: G OE N
------------------------------------------------------
modprobe/2374 is trying to acquire lock:
ffff8c2270070de0 (kn->active#6){++++}-{0:0}, at: __kernfs_remove+0xde/0x220
but task is already holding lock:
ffff8c22016e88f8 (&cxl_root_key){+.+.}-{3:3}, at: device_release_driver_internal+0x39/0x210
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&cxl_root_key){+.+.}-{3:3}:
__mutex_lock+0x99/0xc30
uevent_show+0xac/0x130
dev_attr_show+0x18/0x40
sysfs_kf_seq_show+0xac/0xf0
seq_read_iter+0x110/0x450
vfs_read+0x25b/0x340
ksys_read+0x67/0xf0
do_syscall_64+0x75/0x190
entry_SYSCALL_64_after_hwframe+0x76/0x7e
-> #0 (kn->active#6){++++}-{0:0}:
__lock_acquire+0x121a/0x1fa0
lock_acquire+0xd6/0x2e0
kernfs_drain+0x1e9/0x200
__kernfs_remove+0xde/0x220
kernfs_remove_by_name_ns+0x5e/0xa0
device_del+0x168/0x410
device_unregister+0x13/0x60
devres_release_all+0xb8/0x110
device_unbind_cleanup+0xe/0x70
device_release_driver_internal+0x1c7/0x210
driver_detach+0x47/0x90
bus_remove_driver+0x6c/0xf0
cxl_acpi_exit+0xc/0x11 [cxl_acpi]
__do_sys_delete_module.isra.0+0x181/0x260
do_syscall_64+0x75/0x190
entry_SYSCALL_64_after_hwframe+0x76/0x7e
The observation though is that driver objects are typically much longer
lived than device objects. It is reasonable to perform lockless
de-reference of a @driver pointer even if it is racing detach from a
device. Given the infrequency of driver unregistration, use
synchronize_rcu() in module_remove_driver() to close any potential
races. It is potentially overkill to suffer synchronize_rcu() just to
handle the rare module removal racing uevent_show() event.
Thanks to Tetsuo Handa for the debug analysis of the syzbot report [1].
Fixes: c0a40097f0bc ("drivers: core: synchronize really_probe() and dev_uevent()")
Reported-by: syzbot+4762dd74e32532cda5ff(a)syzkaller.appspotmail.com
Reported-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Closes: http://lore.kernel.org/5aa5558f-90a4-4864-b1b1-5d6784c5607d@I-love.SAKURA.n… [1]
Link: http://lore.kernel.org/669073b8ea479_5fffa294c1@dwillia2-xfh.jf.intel.com.n…
Cc: stable(a)vger.kernel.org
Cc: Ashish Sangwan <a.sangwan(a)samsung.com>
Cc: Namjae Jeon <namjae.jeon(a)samsung.com>
Cc: Dirk Behme <dirk.behme(a)de.bosch.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/base/core.c | 13 ++++++++-----
drivers/base/module.c | 4 ++++
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2b4c0624b704..b5399262198a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -25,6 +25,7 @@
#include <linux/mutex.h>
#include <linux/pm_runtime.h>
#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/string_helpers.h>
@@ -2640,6 +2641,7 @@ static const char *dev_uevent_name(const struct kobject *kobj)
static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
{
const struct device *dev = kobj_to_dev(kobj);
+ struct device_driver *driver;
int retval = 0;
/* add device node properties if present */
@@ -2668,8 +2670,12 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
if (dev->type && dev->type->name)
add_uevent_var(env, "DEVTYPE=%s", dev->type->name);
- if (dev->driver)
- add_uevent_var(env, "DRIVER=%s", dev->driver->name);
+ /* Synchronize with module_remove_driver() */
+ rcu_read_lock();
+ driver = READ_ONCE(dev->driver);
+ if (driver)
+ add_uevent_var(env, "DRIVER=%s", driver->name);
+ rcu_read_unlock();
/* Add common DT information about the device */
of_device_uevent(dev, env);
@@ -2739,11 +2745,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
if (!env)
return -ENOMEM;
- /* Synchronize with really_probe() */
- device_lock(dev);
/* let the kset specific function add its keys */
retval = kset->uevent_ops->uevent(&dev->kobj, env);
- device_unlock(dev);
if (retval)
goto out;
diff --git a/drivers/base/module.c b/drivers/base/module.c
index a1b55da07127..b0b79b9c189d 100644
--- a/drivers/base/module.c
+++ b/drivers/base/module.c
@@ -7,6 +7,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/rcupdate.h>
#include "base.h"
static char *make_driver_name(struct device_driver *drv)
@@ -97,6 +98,9 @@ void module_remove_driver(struct device_driver *drv)
if (!drv)
return;
+ /* Synchronize with dev_uevent() */
+ synchronize_rcu();
+
sysfs_remove_link(&drv->p->kobj, "module");
if (drv->owner)
While calculating the end addresses of main area and segment 0, u32
may be not enough to hold the result without the danger of int
overflow.
Just in case, play it safe and cast one of the operands to a
wider type (u64).
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: fd694733d523 ("f2fs: cover large section in sanity check of super")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
fs/f2fs/super.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3959fd137cc9..4d8f38ca6fcd 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3356,9 +3356,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
u32 segment_count = le32_to_cpu(raw_super->segment_count);
u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
u64 main_end_blkaddr = main_blkaddr +
- (segment_count_main << log_blocks_per_seg);
+ ((u64)segment_count_main << log_blocks_per_seg);
u64 seg_end_blkaddr = segment0_blkaddr +
- (segment_count << log_blocks_per_seg);
+ ((u64)segment_count << log_blocks_per_seg);
if (segment0_blkaddr != cp_blkaddr) {
f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
The result of multiplication between values derived from functions
dir_buckets() and bucket_blocks() *could* technically reach
2^30 * 2^2 = 2^32.
While unlikely to happen, it is prudent to ensure that it will not
lead to integer overflow. Thus, use mul_u32_u32() as it's more
appropriate to mitigate the issue.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: 3843154598a0 ("f2fs: introduce large directory support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
fs/f2fs/dir.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index cbd7a5e96a37..14900ca8a9ff 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -166,7 +166,8 @@ static unsigned long dir_block_index(unsigned int level,
unsigned long bidx = 0;
for (i = 0; i < level; i++)
- bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
+ bidx += mul_u32_u32(dir_buckets(i, dir_level),
+ bucket_blocks(i));
bidx += idx * bucket_blocks(level);
return bidx;
}
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
As the patch covers several code fragments, one singular Fixes: tag
is hard to pinpoint. Hopefully, it's not critical at this stage.
fs/f2fs/extent_cache.c | 4 ++--
fs/f2fs/file.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
It should wait all existing dio write IOs before block removal,
otherwise, previous direct write IO may overwrite data in the
block which may be reused by other inode.
Cc: stable(a)vger.kernel.org
Signed-off-by: Chao Yu <chao(a)kernel.org>
---
fs/f2fs/file.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 76a6043caf27..f2d0e0de775f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1056,6 +1056,13 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return err;
}
+ /*
+ * wait for inflight dio, blocks should be removed after
+ * IO completion.
+ */
+ if (attr->ia_size < old_size)
+ inode_dio_wait(inode);
+
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
@@ -1892,6 +1899,12 @@ static long f2fs_fallocate(struct file *file, int mode,
if (ret)
goto out;
+ /*
+ * wait for inflight dio, blocks should be removed after IO
+ * completion.
+ */
+ inode_dio_wait(inode);
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
if (offset >= inode->i_size)
goto out;
--
2.40.1
From: Jason Gerecke <jason.gerecke(a)wacom.com>
The Wacom driver maps the HID_DG_TWIST usage to ABS_Z (rather than ABS_RZ)
for historic reasons. When the code to support twist was introduced in
commit 50066a042da5 ("HID: wacom: generic: Add support for height, tilt,
and twist usages"), we were careful to write it in such a way that it had
HID calculate the resolution of the twist axis assuming ABS_RZ instead
(so that we would get correct angular behavior). This was broken with
the introduction of commit 08a46b4190d3 ("HID: wacom: Set a default
resolution for older tablets"), which moved the resolution calculation
to occur *before* the adjustment from ABS_Z to ABS_RZ occurred.
This commit moves the calculation of resolution after the point that
we are finished setting things up for its proper use.
Signed-off-by: Jason Gerecke <jason.gerecke(a)wacom.com>
Fixes: 08a46b4190d3 ("HID: wacom: Set a default resolution for older tablets")
Cc: stable(a)vger.kernel.org
---
drivers/hid/wacom_wac.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 1f4564982b958..2541fa2e0fa3b 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1878,12 +1878,14 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage,
int fmax = field->logical_maximum;
unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid);
int resolution_code = code;
- int resolution = hidinput_calc_abs_res(field, resolution_code);
+ int resolution;
if (equivalent_usage == HID_DG_TWIST) {
resolution_code = ABS_RZ;
}
+ resolution = hidinput_calc_abs_res(field, resolution_code);
+
if (equivalent_usage == HID_GD_X) {
fmin += features->offset_left;
fmax -= features->offset_right;
--
2.45.2
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6807352353561187a718e87204458999dbcbba1b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072916-pastrami-suction-5192@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
680735235356 ("ipv4: fix source address selection with route leak")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6807352353561187a718e87204458999dbcbba1b Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
Date: Wed, 10 Jul 2024 10:14:27 +0200
Subject: [PATCH] ipv4: fix source address selection with route leak
By default, an address assigned to the output interface is selected when
the source address is not specified. This is problematic when a route,
configured in a vrf, uses an interface from another vrf (aka route leak).
The original vrf does not own the selected source address.
Let's add a check against the output interface and call the appropriate
function to select the source address.
CC: stable(a)vger.kernel.org
Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
Reviewed-by: David Ahern <dsahern(a)kernel.org>
Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f669da98d11d..8956026bc0a2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2270,6 +2270,15 @@ void fib_select_path(struct net *net, struct fib_result *res,
fib_select_default(fl4, res);
check_saddr:
- if (!fl4->saddr)
- fl4->saddr = fib_result_prefsrc(net, res);
+ if (!fl4->saddr) {
+ struct net_device *l3mdev;
+
+ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev);
+
+ if (!l3mdev ||
+ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev)
+ fl4->saddr = fib_result_prefsrc(net, res);
+ else
+ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
+ }
}