The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 1cade98cf6415897bf9342ee451cc5b40b58c638
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100148-utensil-cornbread-3a99@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
1cade98cf641 ("f2fs: fix several potential integer overflows in file offsets")
e7547daccd6a ("f2fs: refactor extent_cache to support for read and more")
749d543c0d45 ("f2fs: remove unnecessary __init_extent_tree")
3bac20a8f011 ("f2fs: move internal functions into extent_cache.c")
12607c1ba763 ("f2fs: specify extent cache for read explicitly")
544b53dadc20 ("f2fs: code clean and fix a type error")
a834aa3ec95b ("f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file")
07725adc55c0 ("f2fs: fix race condition on setting FI_NO_EXTENT flag")
01fc4b9a6ed8 ("f2fs: use onstack pages instead of pvec")
4f8219f8aa17 ("f2fs: intorduce f2fs_all_cluster_page_ready")
054cb2891b9c ("f2fs: replace F2FS_I(inode) and sbi by the local variable")
3db1de0e582c ("f2fs: change the current atomic write way")
71419129625a ("f2fs: give priority to select unpinned section for foreground GC")
a9163b947ae8 ("f2fs: write checkpoint during FG_GC")
2aaf51dd39af ("f2fs: fix dereference of stale list iterator after loop body")
642c0969916e ("f2fs: don't set GC_FAILURE_PIN for background GC")
a22bb5526d7d ("f2fs: check pinfile in gc_data_segment() in advance")
6b1f86f8e9c7 ("Merge tag 'folio-5.18b' of git://git.infradead.org/users/willy/pagecache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Date: Wed, 24 Jul 2024 10:28:38 -0700
Subject: [PATCH] f2fs: fix several potential integer overflows in file offsets
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1cade98cf6415897bf9342ee451cc5b40b58c638
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100147-unsolved-revision-357a@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
1cade98cf641 ("f2fs: fix several potential integer overflows in file offsets")
e7547daccd6a ("f2fs: refactor extent_cache to support for read and more")
749d543c0d45 ("f2fs: remove unnecessary __init_extent_tree")
3bac20a8f011 ("f2fs: move internal functions into extent_cache.c")
12607c1ba763 ("f2fs: specify extent cache for read explicitly")
544b53dadc20 ("f2fs: code clean and fix a type error")
a834aa3ec95b ("f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file")
07725adc55c0 ("f2fs: fix race condition on setting FI_NO_EXTENT flag")
01fc4b9a6ed8 ("f2fs: use onstack pages instead of pvec")
4f8219f8aa17 ("f2fs: intorduce f2fs_all_cluster_page_ready")
054cb2891b9c ("f2fs: replace F2FS_I(inode) and sbi by the local variable")
3db1de0e582c ("f2fs: change the current atomic write way")
71419129625a ("f2fs: give priority to select unpinned section for foreground GC")
a9163b947ae8 ("f2fs: write checkpoint during FG_GC")
2aaf51dd39af ("f2fs: fix dereference of stale list iterator after loop body")
642c0969916e ("f2fs: don't set GC_FAILURE_PIN for background GC")
a22bb5526d7d ("f2fs: check pinfile in gc_data_segment() in advance")
6b1f86f8e9c7 ("Merge tag 'folio-5.18b' of git://git.infradead.org/users/willy/pagecache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Date: Wed, 24 Jul 2024 10:28:38 -0700
Subject: [PATCH] f2fs: fix several potential integer overflows in file offsets
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1cade98cf6415897bf9342ee451cc5b40b58c638
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100147-untaxed-viscosity-628e@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
1cade98cf641 ("f2fs: fix several potential integer overflows in file offsets")
e7547daccd6a ("f2fs: refactor extent_cache to support for read and more")
749d543c0d45 ("f2fs: remove unnecessary __init_extent_tree")
3bac20a8f011 ("f2fs: move internal functions into extent_cache.c")
12607c1ba763 ("f2fs: specify extent cache for read explicitly")
544b53dadc20 ("f2fs: code clean and fix a type error")
a834aa3ec95b ("f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file")
07725adc55c0 ("f2fs: fix race condition on setting FI_NO_EXTENT flag")
01fc4b9a6ed8 ("f2fs: use onstack pages instead of pvec")
4f8219f8aa17 ("f2fs: intorduce f2fs_all_cluster_page_ready")
054cb2891b9c ("f2fs: replace F2FS_I(inode) and sbi by the local variable")
3db1de0e582c ("f2fs: change the current atomic write way")
71419129625a ("f2fs: give priority to select unpinned section for foreground GC")
a9163b947ae8 ("f2fs: write checkpoint during FG_GC")
2aaf51dd39af ("f2fs: fix dereference of stale list iterator after loop body")
642c0969916e ("f2fs: don't set GC_FAILURE_PIN for background GC")
a22bb5526d7d ("f2fs: check pinfile in gc_data_segment() in advance")
6b1f86f8e9c7 ("Merge tag 'folio-5.18b' of git://git.infradead.org/users/willy/pagecache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Date: Wed, 24 Jul 2024 10:28:38 -0700
Subject: [PATCH] f2fs: fix several potential integer overflows in file offsets
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1cade98cf6415897bf9342ee451cc5b40b58c638
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100146-petite-uncoated-28c9@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
1cade98cf641 ("f2fs: fix several potential integer overflows in file offsets")
e7547daccd6a ("f2fs: refactor extent_cache to support for read and more")
749d543c0d45 ("f2fs: remove unnecessary __init_extent_tree")
3bac20a8f011 ("f2fs: move internal functions into extent_cache.c")
12607c1ba763 ("f2fs: specify extent cache for read explicitly")
544b53dadc20 ("f2fs: code clean and fix a type error")
a834aa3ec95b ("f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file")
07725adc55c0 ("f2fs: fix race condition on setting FI_NO_EXTENT flag")
01fc4b9a6ed8 ("f2fs: use onstack pages instead of pvec")
4f8219f8aa17 ("f2fs: intorduce f2fs_all_cluster_page_ready")
054cb2891b9c ("f2fs: replace F2FS_I(inode) and sbi by the local variable")
3db1de0e582c ("f2fs: change the current atomic write way")
71419129625a ("f2fs: give priority to select unpinned section for foreground GC")
a9163b947ae8 ("f2fs: write checkpoint during FG_GC")
2aaf51dd39af ("f2fs: fix dereference of stale list iterator after loop body")
642c0969916e ("f2fs: don't set GC_FAILURE_PIN for background GC")
a22bb5526d7d ("f2fs: check pinfile in gc_data_segment() in advance")
6b1f86f8e9c7 ("Merge tag 'folio-5.18b' of git://git.infradead.org/users/willy/pagecache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Date: Wed, 24 Jul 2024 10:28:38 -0700
Subject: [PATCH] f2fs: fix several potential integer overflows in file offsets
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 7ee85f5515e86a4e2a2f51969795920733912bad
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100131-uncharted-flyaway-8acd@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
7ee85f5515e8 ("btrfs: fix race setting file private on concurrent lseek using same fd")
68539bd0e73b ("btrfs: update comment for struct btrfs_inode::lock")
398fb9131f31 ("btrfs: reorder btrfs_inode to fill gaps")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ee85f5515e86a4e2a2f51969795920733912bad Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Tue, 3 Sep 2024 10:55:36 +0100
Subject: [PATCH] btrfs: fix race setting file private on concurrent lseek
using same fd
When doing concurrent lseek(2) system calls against the same file
descriptor, using multiple threads belonging to the same process, we have
a short time window where a race happens and can result in a memory leak.
The race happens like this:
1) A program opens a file descriptor for a file and then spawns two
threads (with the pthreads library for example), lets call them
task A and task B;
2) Task A calls lseek with SEEK_DATA or SEEK_HOLE and ends up at
file.c:find_desired_extent() while holding a read lock on the inode;
3) At the start of find_desired_extent(), it extracts the file's
private_data pointer into a local variable named 'private', which has
a value of NULL;
4) Task B also calls lseek with SEEK_DATA or SEEK_HOLE, locks the inode
in shared mode and enters file.c:find_desired_extent(), where it also
extracts file->private_data into its local variable 'private', which
has a NULL value;
5) Because it saw a NULL file private, task A allocates a private
structure and assigns to the file structure;
6) Task B also saw a NULL file private so it also allocates its own file
private and then assigns it to the same file structure, since both
tasks are using the same file descriptor.
At this point we leak the private structure allocated by task A.
Besides the memory leak, there's also the detail that both tasks end up
using the same cached state record in the private structure (struct
btrfs_file_private::llseek_cached_state), which can result in a
use-after-free problem since one task can free it while the other is
still using it (only one task took a reference count on it). Also, sharing
the cached state is not a good idea since it could result in incorrect
results in the future - right now it should not be a problem because it
end ups being used only in extent-io-tree.c:count_range_bits() where we do
range validation before using the cached state.
Fix this by protecting the private assignment and check of a file while
holding the inode's spinlock and keep track of the task that allocated
the private, so that it's used only by that task in order to prevent
user-after-free issues with the cached state record as well as potentially
using it incorrectly in the future.
Fixes: 3c32c7212f16 ("btrfs: use cached state when looking for delalloc ranges with lseek")
CC: stable(a)vger.kernel.org # 6.6+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 9a4b7c119318..e152fde888fc 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -152,6 +152,7 @@ struct btrfs_inode {
* logged_trans), to access/update delalloc_bytes, new_delalloc_bytes,
* defrag_bytes, disk_i_size, outstanding_extents, csum_bytes and to
* update the VFS' inode number of bytes used.
+ * Also protects setting struct file::private_data.
*/
spinlock_t lock;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1a44fb9845e3..317a3712270f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -463,6 +463,8 @@ struct btrfs_file_private {
void *filldir_buf;
u64 last_index;
struct extent_state *llseek_cached_state;
+ /* Task that allocated this structure. */
+ struct task_struct *owner_task;
};
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c5e36f58eb07..4fb521d91b06 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3485,7 +3485,7 @@ static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence,
static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
{
struct btrfs_inode *inode = BTRFS_I(file->f_mapping->host);
- struct btrfs_file_private *private = file->private_data;
+ struct btrfs_file_private *private;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_state *cached_state = NULL;
struct extent_state **delalloc_cached_state;
@@ -3513,7 +3513,19 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
inode_get_bytes(&inode->vfs_inode) == i_size)
return i_size;
- if (!private) {
+ spin_lock(&inode->lock);
+ private = file->private_data;
+ spin_unlock(&inode->lock);
+
+ if (private && private->owner_task != current) {
+ /*
+ * Not allocated by us, don't use it as its cached state is used
+ * by the task that allocated it and we don't want neither to
+ * mess with it nor get incorrect results because it reflects an
+ * invalid state for the current task.
+ */
+ private = NULL;
+ } else if (!private) {
private = kzalloc(sizeof(*private), GFP_KERNEL);
/*
* No worries if memory allocation failed.
@@ -3521,7 +3533,23 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
* lseek SEEK_HOLE/DATA calls to a file when there's delalloc,
* so everything will still be correct.
*/
- file->private_data = private;
+ if (private) {
+ bool free = false;
+
+ private->owner_task = current;
+
+ spin_lock(&inode->lock);
+ if (file->private_data)
+ free = true;
+ else
+ file->private_data = private;
+ spin_unlock(&inode->lock);
+
+ if (free) {
+ kfree(private);
+ private = NULL;
+ }
+ }
}
if (private)
Ensure we serialize with completion side to prevent UAF with fence going
out of scope on the stack, since we have no clue if it will fire after
the timeout before we can erase from the xa. Also we have some dependent
loads and stores for which we need the correct ordering, and we lack the
needed barriers. Fix this by grabbing the ct->lock after the wait, which
is also held by the completion side.
v2 (Badal):
- Also print done after acquiring the lock and seeing timeout.
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Badal Nilawar <badal.nilawar(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_guc_ct.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 4b95f75b1546..44263b3cd8c7 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -903,16 +903,26 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
}
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
if (!ret) {
- xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
- g2h_fence.seqno, action[0]);
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ mutex_unlock(&ct->lock);
return -ETIME;
}
if (g2h_fence.retry) {
xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
goto retry;
}
if (g2h_fence.fail) {
@@ -921,7 +931,12 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
ret = -EIO;
}
- return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
}
/**
--
2.46.2
Hi,
as discussed in [1], this is a manual backport of the remaining two
patches to let the io worker threads respect the affinites defined by
the cgroup of the process.
In 6.1 one worker is created per NUMA node, while in da64d6db3bd3
("io_uring: One wqe per wq") this is changed to only have a single worker.
As this patch is pretty invasive, Jens and me agreed to not backport it.
Instead we now limit the workers cpuset to the cpus that are in the
intersection between what the cgroup allows and what the NUMA node has.
This leaves the question what to do in case the intersection is empty:
To be backwarts compatible, we allow this case, but restrict the cpumask
of the poller to the cpuset defined by the cgroup. We further believe
this is a reasonable decision, as da64d6db3bd3 drops the NUMA awareness
anyways.
[1] https://lore.kernel.org/lkml/ec01745a-b102-4f6e-abc9-abd636d36319@kernel.dk
Best regards,
Felix Moessbauer
Siemens AG
Felix Moessbauer (2):
io_uring/io-wq: do not allow pinning outside of cpuset
io_uring/io-wq: inherit cpuset of cgroup in io worker
io_uring/io-wq.c | 33 ++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)
--
2.39.2