Hi,
This patch set is intended for stable/linux-5.{15, 10}.y. The patches applied cleanly without deviations from the original upstream patches. The last patch is fixing the bug reported at [1]. The other three are prerequisites for the last commit. I tested the patches and I confirm that the reproducer no longer complains on linux-5.{15, 10}.y. Older LTS kernels have more dependencies, let's fix these until I sort out what else should be backported for the older LTS kernels.
[1] LINK: https://syzkaller.appspot.com/bug?id=5266d464285a03cee9dbfda7d2452a72c3c2ae7...
Cheers, ta
Lukas Czerner (1): ext4: block range must be validated before use in ext4_mb_clear_bb()
Ritesh Harjani (3): ext4: refactor ext4_free_blocks() to pull out ext4_mb_clear_bb() ext4: add ext4_sb_block_valid() refactored out of ext4_inode_block_valid() ext4: add strict range checks while freeing blocks
fs/ext4/block_validity.c | 26 +++-- fs/ext4/ext4.h | 3 + fs/ext4/mballoc.c | 205 +++++++++++++++++++++++---------------- 3 files changed, 139 insertions(+), 95 deletions(-)
From: Ritesh Harjani riteshh@linux.ibm.com
[ Upstream commit 8ac3939db99f99667b8eb670cf4baf292896e72d ]
ext4_free_blocks() function became too long and confusing, this patch just pulls out the ext4_mb_clear_bb() function logic from it which clears the block bitmap and frees it.
No functionality change in this patch
Signed-off-by: Ritesh Harjani riteshh@linux.ibm.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/22c30fbb26ba409cf8aa5f0c7912970272c459e8.164499261... Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Tudor Ambarus tudor.ambarus@linaro.org --- fs/ext4/mballoc.c | 180 ++++++++++++++++++++++++++-------------------- 1 file changed, 102 insertions(+), 78 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0c7498a59943..4418d011a654 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5888,7 +5888,8 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, }
/** - * ext4_free_blocks() -- Free given blocks and update quota + * ext4_mb_clear_bb() -- helper function for freeing blocks. + * Used by ext4_free_blocks() * @handle: handle for this transaction * @inode: inode * @bh: optional buffer of the block to be freed @@ -5896,9 +5897,9 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ -void ext4_free_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t block, - unsigned long count, int flags) +static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, + int flags) { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; @@ -5915,80 +5916,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb);
- if (sbi->s_mount_state & EXT4_FC_REPLAY) { - ext4_free_blocks_simple(inode, block, count); - return; - } - - might_sleep(); - if (bh) { - if (block) - BUG_ON(block != bh->b_blocknr); - else - block = bh->b_blocknr; - } - - if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && - !ext4_inode_block_valid(inode, block, count)) { - ext4_error(sb, "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); - goto error_return; - } - - ext4_debug("freeing block %llu\n", block); - trace_ext4_free_blocks(inode, block, count, flags); - - if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { - BUG_ON(count > 1); - - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, - inode, bh, block); - } - - /* - * If the extent to be freed does not begin on a cluster - * boundary, we need to deal with partial clusters at the - * beginning and end of the extent. Normally we will free - * blocks at the beginning or the end unless we are explicitly - * requested to avoid doing so. - */ - overflow = EXT4_PBLK_COFF(sbi, block); - if (overflow) { - if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { - overflow = sbi->s_cluster_ratio - overflow; - block += overflow; - if (count > overflow) - count -= overflow; - else - return; - } else { - block -= overflow; - count += overflow; - } - } - overflow = EXT4_LBLK_COFF(sbi, count); - if (overflow) { - if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { - if (count > overflow) - count -= overflow; - else - return; - } else - count += sbi->s_cluster_ratio - overflow; - } - - if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { - int i; - int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; - - for (i = 0; i < count; i++) { - cond_resched(); - if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); - ext4_forget(handle, is_metadata, inode, bh, block + i); - } - } - do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); @@ -6156,6 +6083,103 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, return; }
+/** + * ext4_free_blocks() -- Free given blocks and update quota + * @handle: handle for this transaction + * @inode: inode + * @bh: optional buffer of the block to be freed + * @block: starting physical block to be freed + * @count: number of blocks to be freed + * @flags: flags used by ext4_free_blocks + */ +void ext4_free_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags) +{ + struct super_block *sb = inode->i_sb; + unsigned int overflow; + struct ext4_sb_info *sbi; + + sbi = EXT4_SB(sb); + + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + ext4_free_blocks_simple(inode, block, count); + return; + } + + might_sleep(); + if (bh) { + if (block) + BUG_ON(block != bh->b_blocknr); + else + block = bh->b_blocknr; + } + + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && + !ext4_inode_block_valid(inode, block, count)) { + ext4_error(sb, "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); + return; + } + + ext4_debug("freeing block %llu\n", block); + trace_ext4_free_blocks(inode, block, count, flags); + + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + BUG_ON(count > 1); + + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, bh, block); + } + + /* + * If the extent to be freed does not begin on a cluster + * boundary, we need to deal with partial clusters at the + * beginning and end of the extent. Normally we will free + * blocks at the beginning or the end unless we are explicitly + * requested to avoid doing so. + */ + overflow = EXT4_PBLK_COFF(sbi, block); + if (overflow) { + if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { + overflow = sbi->s_cluster_ratio - overflow; + block += overflow; + if (count > overflow) + count -= overflow; + else + return; + } else { + block -= overflow; + count += overflow; + } + } + overflow = EXT4_LBLK_COFF(sbi, count); + if (overflow) { + if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { + if (count > overflow) + count -= overflow; + else + return; + } else + count += sbi->s_cluster_ratio - overflow; + } + + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + int i; + int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; + + for (i = 0; i < count; i++) { + cond_resched(); + if (is_metadata) + bh = sb_find_get_block(inode->i_sb, block + i); + ext4_forget(handle, is_metadata, inode, bh, block + i); + } + } + + ext4_mb_clear_bb(handle, inode, block, count, flags); + return; +} + /** * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction
From: Ritesh Harjani riteshh@linux.ibm.com
[ Upstream commit 6bc6c2bdf1baca6522b8d9ba976257d722423085 ]
This API will be needed at places where we don't have an inode for e.g. while freeing blocks in ext4_group_add_blocks()
Suggested-by: Jan Kara jack@suse.cz Signed-off-by: Ritesh Harjani riteshh@linux.ibm.com Link: https://lore.kernel.org/r/dd34a236543ad5ae7123eeebe0cb69e6bdd44f34.164499261... Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Tudor Ambarus tudor.ambarus@linaro.org --- fs/ext4/block_validity.c | 26 +++++++++++++++++--------- fs/ext4/ext4.h | 3 +++ 2 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 4666b55b736e..5504f72bbbbe 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -292,15 +292,10 @@ void ext4_release_system_zone(struct super_block *sb) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); }
-/* - * Returns 1 if the passed-in block region (start_blk, - * start_blk+count) is valid; 0 if some part of the block region - * overlaps with some other filesystem metadata blocks. - */ -int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, - unsigned int count) +int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, + ext4_fsblk_t start_blk, unsigned int count) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_system_blocks *system_blks; struct ext4_system_zone *entry; struct rb_node *n; @@ -329,7 +324,9 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, else if (start_blk >= (entry->start_blk + entry->count)) n = n->rb_right; else { - ret = (entry->ino == inode->i_ino); + ret = 0; + if (inode) + ret = (entry->ino == inode->i_ino); break; } } @@ -338,6 +335,17 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, return ret; }
+/* + * Returns 1 if the passed-in block region (start_blk, + * start_blk+count) is valid; 0 if some part of the block region + * overlaps with some other filesystem metadata blocks. + */ +int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, + unsigned int count) +{ + return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count); +} + int ext4_check_blockref(const char *function, unsigned int line, struct inode *inode, __le32 *p, unsigned int max) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bc209f303327..80f0942fa165 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3698,6 +3698,9 @@ extern int ext4_inode_block_valid(struct inode *inode, unsigned int count); extern int ext4_check_blockref(const char *, unsigned int, struct inode *, __le32 *, unsigned int); +extern int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, + ext4_fsblk_t start_blk, unsigned int count); +
/* extents.c */ struct ext4_ext_path;
From: Ritesh Harjani riteshh@linux.ibm.com
[ Upstream commit a00b482b82fb098956a5bed22bd7873e56f152f1 ]
Currently ext4_mb_clear_bb() & ext4_group_add_blocks() only checks whether the given block ranges (which is to be freed) belongs to any FS metadata blocks or not, of the block's respective block group. But to detect any FS error early, it is better to add more strict checkings in those functions which checks whether the given blocks belongs to any critical FS metadata or not within system-zone.
Suggested-by: Jan Kara jack@suse.cz Signed-off-by: Ritesh Harjani riteshh@linux.ibm.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/ddd9143d064774e32d6364a99667817c6e8bfdc0.164499261... Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Tudor Ambarus tudor.ambarus@linaro.org --- fs/ext4/mballoc.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4418d011a654..7b4359862a60 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5946,13 +5946,7 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, goto error_return; }
- if (in_range(ext4_block_bitmap(sb, gdp), block, count) || - in_range(ext4_inode_bitmap(sb, gdp), block, count) || - in_range(block, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group)) { - + if (!ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ @@ -6023,7 +6017,7 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, NULL); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" - " group:%d block:%d count:%lu failed" + " group:%u block:%d count:%lu failed" " with %d", block_group, bit, count, err); } else @@ -6236,11 +6230,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, goto error_return; }
- if (in_range(ext4_block_bitmap(sb, desc), block, count) || - in_range(ext4_inode_bitmap(sb, desc), block, count) || - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, desc), - sbi->s_itb_per_group)) { + if (!ext4_sb_block_valid(sb, NULL, block, count)) { ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count);
From: Lukas Czerner lczerner@redhat.com
[ Upstream commit 1e1c2b86ef86a8477fd9b9a4f48a6bfe235606f6 ]
Block range to free is validated in ext4_free_blocks() using ext4_inode_block_valid() and then it's passed to ext4_mb_clear_bb(). However in some situations on bigalloc file system the range might be adjusted after the validation in ext4_free_blocks() which can lead to troubles on corrupted file systems such as one found by syzkaller that resulted in the following BUG
kernel BUG at fs/ext4/ext4.h:3319! PREEMPT SMP NOPTI CPU: 28 PID: 4243 Comm: repro Kdump: loaded Not tainted 5.19.0-rc6+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014 RIP: 0010:ext4_free_blocks+0x95e/0xa90 Call Trace: <TASK> ? lock_timer_base+0x61/0x80 ? __es_remove_extent+0x5a/0x760 ? __mod_timer+0x256/0x380 ? ext4_ind_truncate_ensure_credits+0x90/0x220 ext4_clear_blocks+0x107/0x1b0 ext4_free_data+0x15b/0x170 ext4_ind_truncate+0x214/0x2c0 ? _raw_spin_unlock+0x15/0x30 ? ext4_discard_preallocations+0x15a/0x410 ? ext4_journal_check_start+0xe/0x90 ? __ext4_journal_start_sb+0x2f/0x110 ext4_truncate+0x1b5/0x460 ? __ext4_journal_start_sb+0x2f/0x110 ext4_evict_inode+0x2b4/0x6f0 evict+0xd0/0x1d0 ext4_enable_quotas+0x11f/0x1f0 ext4_orphan_cleanup+0x3de/0x430 ? proc_create_seq_private+0x43/0x50 ext4_fill_super+0x295f/0x3ae0 ? snprintf+0x39/0x40 ? sget_fc+0x19c/0x330 ? ext4_reconfigure+0x850/0x850 get_tree_bdev+0x16d/0x260 vfs_get_tree+0x25/0xb0 path_mount+0x431/0xa70 __x64_sys_mount+0xe2/0x120 do_syscall_64+0x5b/0x80 ? do_user_addr_fault+0x1e2/0x670 ? exc_page_fault+0x70/0x170 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fdf4e512ace
Fix it by making sure that the block range is properly validated before used every time it changes in ext4_free_blocks() or ext4_mb_clear_bb().
Link: https://syzkaller.appspot.com/bug?id=5266d464285a03cee9dbfda7d2452a72c3c2ae7... Reported-by: syzbot+15cd994e273307bf5cfa@syzkaller.appspotmail.com Signed-off-by: Lukas Czerner lczerner@redhat.com Cc: Tadeusz Struk tadeusz.struk@linaro.org Tested-by: Tadeusz Struk tadeusz.struk@linaro.org Link: https://lore.kernel.org/r/20220714165903.58260-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Tudor Ambarus tudor.ambarus@linaro.org --- fs/ext4/mballoc.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7b4359862a60..e6718bfc6c55 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5916,6 +5916,15 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb);
+ if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && + !ext4_inode_block_valid(inode, block, count)) { + ext4_error(sb, "Freeing blocks in system zone - " + "Block = %llu, count = %lu", block, count); + /* err = 0. ext4_std_error should be a no op */ + goto error_return; + } + flags |= EXT4_FREE_BLOCKS_VALIDATED; + do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); @@ -5932,6 +5941,8 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, overflow = EXT4_C2B(sbi, bit) + count - EXT4_BLOCKS_PER_GROUP(sb); count -= overflow; + /* The range changed so it's no longer validated */ + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; } count_clusters = EXT4_NUM_B2C(sbi, count); bitmap_bh = ext4_read_block_bitmap(sb, block_group); @@ -5946,7 +5957,8 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, goto error_return; }
- if (!ext4_inode_block_valid(inode, block, count)) { + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && + !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ @@ -6069,6 +6081,8 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, block += count; count = overflow; put_bh(bitmap_bh); + /* The range changed so it's no longer validated */ + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; goto do_more; } error_return: @@ -6115,6 +6129,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, "block = %llu, count = %lu", block, count); return; } + flags |= EXT4_FREE_BLOCKS_VALIDATED;
ext4_debug("freeing block %llu\n", block); trace_ext4_free_blocks(inode, block, count, flags); @@ -6146,6 +6161,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, block -= overflow; count += overflow; } + /* The range changed so it's no longer validated */ + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; } overflow = EXT4_LBLK_COFF(sbi, count); if (overflow) { @@ -6156,6 +6173,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, return; } else count += sbi->s_cluster_ratio - overflow; + /* The range changed so it's no longer validated */ + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; }
if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
On Thu, Mar 02, 2023 at 03:36:06PM +0000, Tudor Ambarus wrote:
Hi,
This patch set is intended for stable/linux-5.{15, 10}.y. The patches applied cleanly without deviations from the original upstream patches. The last patch is fixing the bug reported at [1]. The other three are prerequisites for the last commit. I tested the patches and I confirm that the reproducer no longer complains on linux-5.{15, 10}.y. Older LTS kernels have more dependencies, let's fix these until I sort out what else should be backported for the older LTS kernels.
[1] LINK: https://syzkaller.appspot.com/bug?id=5266d464285a03cee9dbfda7d2452a72c3c2ae7...
Cheers, ta
Lukas Czerner (1): ext4: block range must be validated before use in ext4_mb_clear_bb()
Ritesh Harjani (3): ext4: refactor ext4_free_blocks() to pull out ext4_mb_clear_bb() ext4: add ext4_sb_block_valid() refactored out of ext4_inode_block_valid() ext4: add strict range checks while freeing blocks
fs/ext4/block_validity.c | 26 +++-- fs/ext4/ext4.h | 3 + fs/ext4/mballoc.c | 205 +++++++++++++++++++++++---------------- 3 files changed, 139 insertions(+), 95 deletions(-)
Now queued up, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org