The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 10b591e7fb7cdc8c1e53e9c000dc0ef7069aaa76 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to 'stable@vger.kernel.org' --in-reply-to '2025122922-tyke-slip-919d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 10b591e7fb7cdc8c1e53e9c000dc0ef7069aaa76 Mon Sep 17 00:00:00 2001 From: Chao Yu chao@kernel.org Date: Wed, 22 Oct 2025 11:06:36 +0800 Subject: [PATCH] f2fs: fix to avoid updating compression context during writeback
Bai, Shuangpeng sjb7183@psu.edu reported a bug as below:
Oops: divide error: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 11441 Comm: syz.0.46 Not tainted 6.17.0 #1 PREEMPT(full) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:f2fs_all_cluster_page_ready+0x106/0x550 fs/f2fs/compress.c:857 Call Trace: <TASK> f2fs_write_cache_pages fs/f2fs/data.c:3078 [inline] __f2fs_write_data_pages fs/f2fs/data.c:3290 [inline] f2fs_write_data_pages+0x1c19/0x3600 fs/f2fs/data.c:3317 do_writepages+0x38e/0x640 mm/page-writeback.c:2634 filemap_fdatawrite_wbc mm/filemap.c:386 [inline] __filemap_fdatawrite_range mm/filemap.c:419 [inline] file_write_and_wait_range+0x2ba/0x3e0 mm/filemap.c:794 f2fs_do_sync_file+0x6e6/0x1b00 fs/f2fs/file.c:294 generic_write_sync include/linux/fs.h:3043 [inline] f2fs_file_write_iter+0x76e/0x2700 fs/f2fs/file.c:5259 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x7e9/0xe00 fs/read_write.c:686 ksys_write+0x19d/0x2d0 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf7/0x470 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f
The bug was triggered w/ below race condition:
fsync setattr ioctl - f2fs_do_sync_file - file_write_and_wait_range - f2fs_write_cache_pages : inode is non-compressed : cc.cluster_size = F2FS_I(inode)->i_cluster_size = 0 - tag_pages_for_writeback - f2fs_setattr - truncate_setsize - f2fs_truncate - f2fs_fileattr_set - f2fs_setflags_common - set_compress_context : F2FS_I(inode)->i_cluster_size = 4 : set_inode_flag(inode, FI_COMPRESSED_FILE) - f2fs_compressed_file : return true - f2fs_all_cluster_page_ready : "pgidx % cc->cluster_size" trigger dividing 0 issue
Let's change as below to fix this issue: - introduce a new atomic type variable .writeback in structure f2fs_inode_info to track the number of threads which calling f2fs_write_cache_pages(). - use .i_sem lock to protect .writeback update. - check .writeback before update compression context in f2fs_setflags_common() to avoid race w/ ->writepages.
Fixes: 4c8ff7095bef ("f2fs: support data compression") Cc: stable@kernel.org Reported-by: Bai, Shuangpeng sjb7183@psu.edu Tested-by: Bai, Shuangpeng sjb7183@psu.edu Closes: https://lore.kernel.org/lkml/44D8F7B3-68AD-425F-9915-65D27591F93F@psu.edu Signed-off-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index eec691262fec..b92d362a02d6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3222,6 +3222,19 @@ static inline bool __should_serialize_io(struct inode *inode, return false; }
+static inline void account_writeback(struct inode *inode, bool inc) +{ + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return; + + f2fs_down_read(&F2FS_I(inode)->i_sem); + if (inc) + atomic_inc(&F2FS_I(inode)->writeback); + else + atomic_dec(&F2FS_I(inode)->writeback); + f2fs_up_read(&F2FS_I(inode)->i_sem); +} + static int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) @@ -3267,10 +3280,14 @@ static int __f2fs_write_data_pages(struct address_space *mapping, locked = true; }
+ account_writeback(inode, true); + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug);
+ account_writeback(inode, false); + if (locked) mutex_unlock(&sbi->writepages);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 575f9666c3b7..e69b01c1173a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -947,6 +947,7 @@ struct f2fs_inode_info { unsigned char i_compress_level; /* compress level (lz4hc,zstd) */ unsigned char i_compress_flag; /* compress flag */ unsigned int i_cluster_size; /* cluster size */ + atomic_t writeback; /* count # of writeback thread */
unsigned int atomic_write_cnt; loff_t original_i_size; /* original i_size before atomic write */ @@ -4663,7 +4664,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) f2fs_up_write(&fi->i_sem); return true; } - if (f2fs_is_mmap_file(inode) || + if (f2fs_is_mmap_file(inode) || atomic_read(&fi->writeback) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { f2fs_up_write(&fi->i_sem); return false; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c045e38e60ee..6d42e2d28861 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2128,8 +2128,9 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
f2fs_down_write(&fi->i_sem); if (!f2fs_may_compress(inode) || - (S_ISREG(inode->i_mode) && - F2FS_HAS_BLOCKS(inode))) { + atomic_read(&fi->writeback) || + (S_ISREG(inode->i_mode) && + F2FS_HAS_BLOCKS(inode))) { f2fs_up_write(&fi->i_sem); return -EINVAL; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cb65ca90f9f6..d0b5791a1f8c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1759,6 +1759,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); atomic_set(&fi->open_count, 0); + atomic_set(&fi->writeback, 0); init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list);
From: Chao Yu chao@kernel.org
[ Upstream commit 968c4f72b23c0c8f1e94e942eab89b8c5a3022e7 ]
After commit 3db1de0e582c ("f2fs: change the current atomic write way"), we removed all GC_FAILURE_ATOMIC usage, let's change i_gc_failures[] array to i_pin_failure for cleanup.
Meanwhile, let's define i_current_depth and i_gc_failures as union variable due to they won't be valid at the same time.
Signed-off-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Stable-dep-of: 10b591e7fb7c ("f2fs: fix to avoid updating compression context during writeback") Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/f2fs.h | 14 +++++--------- fs/f2fs/file.c | 12 +++++------- fs/f2fs/inode.c | 6 ++---- fs/f2fs/recovery.c | 3 +-- 4 files changed, 13 insertions(+), 22 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 406243395b94..9112c3140ede 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -756,11 +756,6 @@ enum {
#define DEF_DIR_LEVEL 0
-enum { - GC_FAILURE_PIN, - MAX_GC_FAILURE -}; - /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -808,9 +803,10 @@ struct f2fs_inode_info { unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ unsigned char i_dir_level; /* use for dentry level for large dir */ - unsigned int i_current_depth; /* only for directory depth */ - /* for gc failure statistic */ - unsigned int i_gc_failures[MAX_GC_FAILURE]; + union { + unsigned int i_current_depth; /* only for directory depth */ + unsigned int i_gc_failures; /* for gc failure statistic */ + }; unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */
@@ -3167,7 +3163,7 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) static inline void f2fs_i_gc_failures_write(struct inode *inode, unsigned int count) { - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count; + F2FS_I(inode)->i_gc_failures = count; f2fs_mark_inode_dirty_sync(inode, true); }
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6267ba6ef108..31d20800b475 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3326,13 +3326,11 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
/* Use i_gc_failures for normal file as a risk signal. */ if (inc) - f2fs_i_gc_failures_write(inode, - fi->i_gc_failures[GC_FAILURE_PIN] + 1); + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
- if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { + if (fi->i_gc_failures > sbi->gc_pin_file_threshold) { f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", - __func__, inode->i_ino, - fi->i_gc_failures[GC_FAILURE_PIN]); + __func__, inode->i_ino, fi->i_gc_failures); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } @@ -3401,7 +3399,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) }
set_inode_flag(inode, FI_PIN_FILE); - ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + ret = F2FS_I(inode)->i_gc_failures; done: f2fs_update_time(sbi, REQ_TIME); out: @@ -3416,7 +3414,7 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) __u32 pin = 0;
if (is_inode_flag_set(inode, FI_PIN_FILE)) - pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + pin = F2FS_I(inode)->i_gc_failures; return put_user(pin, (u32 __user *)arg); }
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 76ec2899cbe8..c67dbe4839e7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -428,8 +428,7 @@ static int do_read_inode(struct inode *inode) if (S_ISDIR(inode->i_mode)) fi->i_current_depth = le32_to_cpu(ri->i_current_depth); else if (S_ISREG(inode->i_mode)) - fi->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(ri->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); if (S_ISREG(inode->i_mode)) @@ -691,8 +690,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); else if (S_ISREG(inode->i_mode)) - ri->i_gc_failures = - cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]); + ri->i_gc_failures = cpu_to_le16(F2FS_I(inode)->i_gc_failures); ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f8852aa52640..223fcdf785f7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -330,8 +330,7 @@ static int recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); f2fs_set_inode_flags(inode); - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(raw->i_gc_failures); + F2FS_I(inode)->i_gc_failures = le16_to_cpu(raw->i_gc_failures);
recover_inline_flags(inode, raw);
From: Jaegeuk Kim jaegeuk@kernel.org
[ Upstream commit ef0c333cad8d1940f132a7ce15f15920216a3bd5 ]
This patch records POSIX_FADV_NOREUSE ranges for users to reclaim the caches instantly off from LRU.
Reviewed-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Stable-dep-of: 10b591e7fb7c ("f2fs: fix to avoid updating compression context during writeback") Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/debug.c | 3 +++ fs/f2fs/f2fs.h | 12 +++++++++- fs/f2fs/file.c | 60 ++++++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/inode.c | 14 ++++++++++++ fs/f2fs/super.c | 1 + 5 files changed, 84 insertions(+), 6 deletions(-)
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0d02224b99b7..68773b48ed4d 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -100,6 +100,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + si->ndonate_files = sbi->donate_files; si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->aw_cnt = atomic_read(&sbi->atomic_files); @@ -436,6 +437,8 @@ static int stat_show(struct seq_file *s, void *v) si->compr_inode, si->compr_blocks); seq_printf(s, " - Swapfile Inode: %u\n", si->swapfile_inode); + seq_printf(s, " - Donate Inode: %u\n", + si->ndonate_files); seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n", si->orphans, si->append, si->update); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9112c3140ede..460c29f95c17 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -831,6 +831,11 @@ struct f2fs_inode_info { #endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ + + /* linked in global inode list for cache donation */ + struct list_head gdonate_list; + pgoff_t donate_start, donate_end; /* inclusive */ + struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; /* cached extent_tree entry */ @@ -1255,6 +1260,7 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ + DONATE_INODE, /* for all inode to donate pages */ NR_INODE_TYPE, };
@@ -1607,6 +1613,9 @@ struct f2fs_sb_info { unsigned int warm_data_age_threshold; unsigned int last_age_weight;
+ /* control donate caches */ + unsigned int donate_files; + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ @@ -3943,7 +3952,8 @@ struct f2fs_stat_info { unsigned long long allocated_data_blocks; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_data, ndirty_qdata; - unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; + unsigned int ndirty_dirs, ndirty_files, ndirty_all; + unsigned int nquota_files, ndonate_files; int nats, dirty_nats, sits, dirty_sits; int free_nids, avail_nids, alloc_nids; int total_count, utilization; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 31d20800b475..564d83e67043 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2432,6 +2432,52 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) return ret; }
+static void f2fs_keep_noreuse_range(struct inode *inode, + loff_t offset, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); + u64 start, end; + + if (!S_ISREG(inode->i_mode)) + return; + + if (offset >= max_bytes || len > max_bytes || + (offset + len) > max_bytes) + return; + + start = offset >> PAGE_SHIFT; + end = DIV_ROUND_UP(offset + len, PAGE_SIZE); + + inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + inode_unlock(inode); + return; + } + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + /* let's remove the range, if len = 0 */ + if (!len) { + if (!list_empty(&F2FS_I(inode)->gdonate_list)) { + list_del_init(&F2FS_I(inode)->gdonate_list); + sbi->donate_files--; + } + } else { + if (list_empty(&F2FS_I(inode)->gdonate_list)) { + list_add_tail(&F2FS_I(inode)->gdonate_list, + &sbi->inode_list[DONATE_INODE]); + sbi->donate_files++; + } else { + list_move_tail(&F2FS_I(inode)->gdonate_list, + &sbi->inode_list[DONATE_INODE]); + } + F2FS_I(inode)->donate_start = start; + F2FS_I(inode)->donate_end = end - 1; + } + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + inode_unlock(inode); +} + static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -5075,12 +5121,16 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, }
err = generic_fadvise(filp, offset, len, advice); - if (!err && advice == POSIX_FADV_DONTNEED && - test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && - f2fs_compressed_file(inode)) - f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + if (err) + return err;
- return err; + if (advice == POSIX_FADV_DONTNEED && + (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && + f2fs_compressed_file(inode))) + f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + else if (advice == POSIX_FADV_NOREUSE) + f2fs_keep_noreuse_range(inode, offset, len); + return 0; }
#ifdef CONFIG_COMPAT diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c67dbe4839e7..7a961672737e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -811,6 +811,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; }
+static void f2fs_remove_donate_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (list_empty(&F2FS_I(inode)->gdonate_list)) + return; + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + list_del_init(&F2FS_I(inode)->gdonate_list); + sbi->donate_files--; + spin_unlock(&sbi->inode_lock[DONATE_INODE]); +} + /* * Called at the last iput() if i_nlink is zero */ @@ -844,6 +857,7 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_bug_on(sbi, get_dirty_pages(inode)); f2fs_remove_dirty_inode(inode); + f2fs_remove_donate_inode(inode);
f2fs_destroy_extent_tree(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b9913ab526fd..ffff81caa244 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1413,6 +1413,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); + INIT_LIST_HEAD(&fi->gdonate_list); init_f2fs_rwsem(&fi->i_gc_rwsem[READ]); init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]); init_f2fs_rwsem(&fi->i_xattr_sem);
From: Jaegeuk Kim jaegeuk@kernel.org
[ Upstream commit 078cad8212ce4f4ebbafcc0936475b8215e1ca2a ]
Let's drop the inode from the donation list when there is no other open file.
Reviewed-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Stable-dep-of: 10b591e7fb7c ("f2fs: fix to avoid updating compression context during writeback") Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 8 +++++++- fs/f2fs/inode.c | 2 +- fs/f2fs/super.c | 1 + 4 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 460c29f95c17..4b295671df8b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -835,6 +835,7 @@ struct f2fs_inode_info { /* linked in global inode list for cache donation */ struct list_head gdonate_list; pgoff_t donate_start, donate_end; /* inclusive */ + atomic_t open_count; /* # of open files */
struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; @@ -3554,6 +3555,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); void f2fs_update_inode(struct inode *inode, struct page *node_page); void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); +void f2fs_remove_donate_inode(struct inode *inode); void f2fs_evict_inode(struct inode *inode); void f2fs_handle_failed_inode(struct inode *inode);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 564d83e67043..7bdf0da5ba69 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -621,7 +621,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err;
- return finish_preallocate_blocks(inode); + err = finish_preallocate_blocks(inode); + if (!err) + atomic_inc(&F2FS_I(inode)->open_count); + return err; }
void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -1963,6 +1966,9 @@ static long f2fs_fallocate(struct file *file, int mode,
static int f2fs_release_file(struct inode *inode, struct file *filp) { + if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) + f2fs_remove_donate_inode(inode); + /* * f2fs_release_file is called at every close calls. So we should * not drop any inmemory pages by close called by other process. diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7a961672737e..4ba5642148b5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -811,7 +811,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; }
-static void f2fs_remove_donate_inode(struct inode *inode) +void f2fs_remove_donate_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ffff81caa244..0523e21fa951 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1409,6 +1409,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); + atomic_set(&fi->open_count, 0); init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list);
From: Chao Yu chao@kernel.org
[ Upstream commit 10b591e7fb7cdc8c1e53e9c000dc0ef7069aaa76 ]
Bai, Shuangpeng sjb7183@psu.edu reported a bug as below:
Oops: divide error: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 11441 Comm: syz.0.46 Not tainted 6.17.0 #1 PREEMPT(full) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:f2fs_all_cluster_page_ready+0x106/0x550 fs/f2fs/compress.c:857 Call Trace: <TASK> f2fs_write_cache_pages fs/f2fs/data.c:3078 [inline] __f2fs_write_data_pages fs/f2fs/data.c:3290 [inline] f2fs_write_data_pages+0x1c19/0x3600 fs/f2fs/data.c:3317 do_writepages+0x38e/0x640 mm/page-writeback.c:2634 filemap_fdatawrite_wbc mm/filemap.c:386 [inline] __filemap_fdatawrite_range mm/filemap.c:419 [inline] file_write_and_wait_range+0x2ba/0x3e0 mm/filemap.c:794 f2fs_do_sync_file+0x6e6/0x1b00 fs/f2fs/file.c:294 generic_write_sync include/linux/fs.h:3043 [inline] f2fs_file_write_iter+0x76e/0x2700 fs/f2fs/file.c:5259 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x7e9/0xe00 fs/read_write.c:686 ksys_write+0x19d/0x2d0 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf7/0x470 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f
The bug was triggered w/ below race condition:
fsync setattr ioctl - f2fs_do_sync_file - file_write_and_wait_range - f2fs_write_cache_pages : inode is non-compressed : cc.cluster_size = F2FS_I(inode)->i_cluster_size = 0 - tag_pages_for_writeback - f2fs_setattr - truncate_setsize - f2fs_truncate - f2fs_fileattr_set - f2fs_setflags_common - set_compress_context : F2FS_I(inode)->i_cluster_size = 4 : set_inode_flag(inode, FI_COMPRESSED_FILE) - f2fs_compressed_file : return true - f2fs_all_cluster_page_ready : "pgidx % cc->cluster_size" trigger dividing 0 issue
Let's change as below to fix this issue: - introduce a new atomic type variable .writeback in structure f2fs_inode_info to track the number of threads which calling f2fs_write_cache_pages(). - use .i_sem lock to protect .writeback update. - check .writeback before update compression context in f2fs_setflags_common() to avoid race w/ ->writepages.
Fixes: 4c8ff7095bef ("f2fs: support data compression") Cc: stable@kernel.org Reported-by: Bai, Shuangpeng sjb7183@psu.edu Tested-by: Bai, Shuangpeng sjb7183@psu.edu Closes: https://lore.kernel.org/lkml/44D8F7B3-68AD-425F-9915-65D27591F93F@psu.edu Signed-off-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org [ Adjust context ] Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/data.c | 17 +++++++++++++++++ fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 5 +++-- fs/f2fs/super.c | 1 + 4 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f5252f3e840a..c863e27fd846 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3233,6 +3233,19 @@ static inline bool __should_serialize_io(struct inode *inode, return false; }
+static inline void account_writeback(struct inode *inode, bool inc) +{ + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return; + + f2fs_down_read(&F2FS_I(inode)->i_sem); + if (inc) + atomic_inc(&F2FS_I(inode)->writeback); + else + atomic_dec(&F2FS_I(inode)->writeback); + f2fs_up_read(&F2FS_I(inode)->i_sem); +} + static int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) @@ -3282,10 +3295,14 @@ static int __f2fs_write_data_pages(struct address_space *mapping, locked = true; }
+ account_writeback(inode, true); + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug);
+ account_writeback(inode, false); + if (locked) mutex_unlock(&sbi->writepages);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4b295671df8b..b45f7ce568e6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -863,6 +863,7 @@ struct f2fs_inode_info { unsigned char i_compress_level; /* compress level (lz4hc,zstd) */ unsigned char i_compress_flag; /* compress flag */ unsigned int i_cluster_size; /* cluster size */ + atomic_t writeback; /* count # of writeback thread */
unsigned int atomic_write_cnt; loff_t original_i_size; /* original i_size before atomic write */ @@ -4480,7 +4481,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) f2fs_up_write(&F2FS_I(inode)->i_sem); return true; } - if (f2fs_is_mmap_file(inode) || + if (f2fs_is_mmap_file(inode) || atomic_read(&fi->writeback) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { f2fs_up_write(&F2FS_I(inode)->i_sem); return false; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7bdf0da5ba69..8cc0b7f5c35d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2042,8 +2042,9 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
f2fs_down_write(&F2FS_I(inode)->i_sem); if (!f2fs_may_compress(inode) || - (S_ISREG(inode->i_mode) && - F2FS_HAS_BLOCKS(inode))) { + atomic_read(&fi->writeback) || + (S_ISREG(inode->i_mode) && + F2FS_HAS_BLOCKS(inode))) { f2fs_up_write(&F2FS_I(inode)->i_sem); return -EINVAL; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0523e21fa951..30b57755ceef 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1410,6 +1410,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); atomic_set(&fi->open_count, 0); + atomic_set(&fi->writeback, 0); init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list);
linux-stable-mirror@lists.linaro.org