commit 43bbddc067883d94de7a43d5756a295439fbe37d upstream.
When we use lstart + len to calculate the end of free extent or prealloc space, it may exceed the maximum value of 4294967295(0xffffffff) supported by ext4_lblk_t and cause overflow, which may lead to various problems.
Therefore, we add two helper functions, extent_logical_end() and pa_logical_end(), to limit the type of end to loff_t, and also convert lstart to loff_t for calculation to avoid overflow.
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Ritesh Harjani (IBM) ritesh.list@gmail.com Link: https://lore.kernel.org/r/20230724121059.11834-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu
Conflicts: fs/ext4/mballoc.c
Signed-off-by: Baokun Li libaokun1@huawei.com --- fs/ext4/mballoc.c | 7 +++---- fs/ext4/mballoc.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9099e112fda5..e07420c6ddee 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3124,7 +3124,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* first, let's learn actual file size * given current request is allocated */ - size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len); + size = extent_logical_end(sbi, &ac->ac_o_ex); size = size << bsbits; if (size < i_size_read(ac->ac_inode)) size = i_size_read(ac->ac_inode); @@ -3462,8 +3462,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) /* all fields in this condition don't change, * so we can skip locking for them */ if (ac->ac_o_ex.fe_logical < pa->pa_lstart || - ac->ac_o_ex.fe_logical >= (pa->pa_lstart + - EXT4_C2B(sbi, pa->pa_len))) + ac->ac_o_ex.fe_logical >= pa_logical_end(sbi, pa)) continue;
/* non-extent files can't have physical blocks past 2^32 */ @@ -4234,7 +4233,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) return;
- size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len); + size = extent_logical_end(sbi, &ac->ac_o_ex); isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) >> bsbits;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 88c98f17e3d9..4e442bad6d73 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -199,6 +199,20 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, (fex->fe_start << EXT4_SB(sb)->s_cluster_bits); }
+static inline loff_t extent_logical_end(struct ext4_sb_info *sbi, + struct ext4_free_extent *fex) +{ + /* Use loff_t to avoid end exceeding ext4_lblk_t max. */ + return (loff_t)fex->fe_logical + EXT4_C2B(sbi, fex->fe_len); +} + +static inline loff_t pa_logical_end(struct ext4_sb_info *sbi, + struct ext4_prealloc_space *pa) +{ + /* Use loff_t to avoid end exceeding ext4_lblk_t max. */ + return (loff_t)pa->pa_lstart + EXT4_C2B(sbi, pa->pa_len); +} + typedef int (*ext4_mballoc_query_range_fn)( struct super_block *sb, ext4_group_t agno,
commit bc056e7163ac7db945366de219745cf94f32a3e6 upstream.
When we calculate the end position of ext4_free_extent, this position may be exactly where ext4_lblk_t (i.e. uint) overflows. For example, if ac_g_ex.fe_logical is 4294965248 and ac_orig_goal_len is 2048, then the computed end is 0x100000000, which is 0. If ac->ac_o_ex.fe_logical is not the first case of adjusting the best extent, that is, new_bex_end > 0, the following BUG_ON will be triggered:
========================================================= kernel BUG at fs/ext4/mballoc.c:5116! invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 3 PID: 673 Comm: xfs_io Tainted: G E 6.5.0-rc1+ #279 RIP: 0010:ext4_mb_new_inode_pa+0xc5/0x430 Call Trace: <TASK> ext4_mb_use_best_found+0x203/0x2f0 ext4_mb_try_best_found+0x163/0x240 ext4_mb_regular_allocator+0x158/0x1550 ext4_mb_new_blocks+0x86a/0xe10 ext4_ext_map_blocks+0xb0c/0x13a0 ext4_map_blocks+0x2cd/0x8f0 ext4_iomap_begin+0x27b/0x400 iomap_iter+0x222/0x3d0 __iomap_dio_rw+0x243/0xcb0 iomap_dio_rw+0x16/0x80 =========================================================
A simple reproducer demonstrating the problem:
mkfs.ext4 -F /dev/sda -b 4096 100M mount /dev/sda /tmp/test fallocate -l1M /tmp/test/tmp fallocate -l10M /tmp/test/file fallocate -i -o 1M -l16777203M /tmp/test/file fsstress -d /tmp/test -l 0 -n 100000 -p 8 & sleep 10 && killall -9 fsstress rm -f /tmp/test/tmp xfs_io -c "open -ad /tmp/test/file" -c "pwrite -S 0xff 0 8192"
We simply refactor the logic for adjusting the best extent by adding a temporary ext4_free_extent ex and use extent_logical_end() to avoid overflow, which also simplifies the code.
Cc: stable@kernel.org # 6.4 Fixes: 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()") Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Ritesh Harjani (IBM) ritesh.list@gmail.com Link: https://lore.kernel.org/r/20230724121059.11834-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu
Conflicts: fs/ext4/mballoc.c
Signed-off-by: Baokun Li libaokun1@huawei.com --- fs/ext4/mballoc.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e07420c6ddee..901302735f1c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3683,8 +3683,11 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) return -ENOMEM;
if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { - int new_bex_start; - int new_bex_end; + struct ext4_free_extent ex = { + .fe_logical = ac->ac_g_ex.fe_logical, + .fe_len = ac->ac_g_ex.fe_len, + }; + loff_t orig_goal_end = extent_logical_end(sbi, &ex);
/* we can't allocate as much as normalizer wants. * so, found space must get proper lstart @@ -3703,29 +3706,23 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) * still cover original start * 3. Else, keep the best ex at start of original request. */ - new_bex_end = ac->ac_g_ex.fe_logical + - EXT4_C2B(sbi, ac->ac_g_ex.fe_len); - new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); - if (ac->ac_o_ex.fe_logical >= new_bex_start) - goto adjust_bex; + ex.fe_len = ac->ac_b_ex.fe_len;
- new_bex_start = ac->ac_g_ex.fe_logical; - new_bex_end = - new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); - if (ac->ac_o_ex.fe_logical < new_bex_end) + ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len); + if (ac->ac_o_ex.fe_logical >= ex.fe_logical) goto adjust_bex;
- new_bex_start = ac->ac_o_ex.fe_logical; - new_bex_end = - new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); + ex.fe_logical = ac->ac_g_ex.fe_logical; + if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex)) + goto adjust_bex;
+ ex.fe_logical = ac->ac_o_ex.fe_logical; adjust_bex: - ac->ac_b_ex.fe_logical = new_bex_start; + ac->ac_b_ex.fe_logical = ex.fe_logical;
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); - BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical + - EXT4_C2B(sbi, ac->ac_g_ex.fe_len))); + BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end); }
/* preallocation can change ac_b_ex, thus we store actually
commit bedc5d34632c21b5adb8ca7143d4c1f794507e4c upstream.
Let's say we want to allocate 2 blocks starting from 4294966386, after predicting the file size, start is aligned to 4294965248, len is changed to 2048, then end = start + size = 0x100000000. Since end is of type ext4_lblk_t, i.e. uint, end is truncated to 0.
This causes (pa->pa_lstart >= end) to always hold when checking if the current extent to be allocated crosses already preallocated blocks, so the resulting ac_g_ex may cross already preallocated blocks. Hence we convert the end type to loff_t and use pa_logical_end() to avoid overflow.
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Ritesh Harjani (IBM) ritesh.list@gmail.com Link: https://lore.kernel.org/r/20230724121059.11834-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu
Conflicts: fs/ext4/mballoc.c
Signed-off-by: Baokun Li libaokun1@huawei.com --- fs/ext4/mballoc.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 901302735f1c..a0ec645bd7ed 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3094,8 +3094,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_super_block *es = sbi->s_es; int bsbits, max; - ext4_lblk_t end; - loff_t size, start_off; + loff_t size, start_off, end; loff_t orig_size __maybe_unused; ext4_lblk_t start; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); @@ -3203,7 +3202,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, /* check we don't cross already preallocated blocks */ rcu_read_lock(); list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { - ext4_lblk_t pa_end; + loff_t pa_end;
if (pa->pa_deleted) continue; @@ -3213,8 +3212,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, continue; }
- pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb), - pa->pa_len); + pa_end = pa_logical_end(EXT4_SB(ac->ac_sb), pa);
/* PA must not overlap original request */ BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || @@ -3243,12 +3241,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, /* XXX: extra loop to check we really don't overlap preallocations */ rcu_read_lock(); list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { - ext4_lblk_t pa_end; + loff_t pa_end;
spin_lock(&pa->pa_lock); if (pa->pa_deleted == 0) { - pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb), - pa->pa_len); + pa_end = pa_logical_end(EXT4_SB(ac->ac_sb), pa); BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); } spin_unlock(&pa->pa_lock);
linux-stable-mirror@lists.linaro.org