From: Zhang Yi yi.zhang@huawei.com
commit 8dd27fecede55e8a4e67eef2878040ecad0f0d33 upstream.
After commit 5946d089379a ("ext4: check for overlapping extents in ext4_valid_extent_entries()"), we can check out the overlapping extent entry in leaf extent blocks. But the out-of-order extent entry in index extent blocks could also trigger bad things if the filesystem is inconsistent. So this patch add a check to figure out the out-of-order index extents and return error.
[Added pblk argument to ext4_valid_extent_entries because pblk is updated in the case of overlapping extents. This argument was added in commit 54d3adbc29f0c7c53890da1683e629cd220d7201.]
Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Theodore Ts'o tytso@mit.edu Link: https://lore.kernel.org/r/20210908120850.4012324-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Leah Rumancik leah.rumancik@gmail.com --- fs/ext4/extents.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ae73e6793683..4f8736b7e497 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -390,9 +390,12 @@ static int ext4_valid_extent_idx(struct inode *inode,
static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent_header *eh, - int depth) + ext4_fsblk_t *pblk, int depth) { unsigned short entries; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + if (eh->eh_entries == 0) return 1;
@@ -403,32 +406,36 @@ static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; ext4_fsblk_t pblock = 0; - ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; - int len = 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0;
/* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - len = ext4_ext_get_actual_len(ext); if ((lblock <= prev) && prev) { pblock = ext4_ext_pblock(ext); es->s_last_error_block = cpu_to_le64(pblock); return 0; } + prev = lblock + ext4_ext_get_actual_len(ext) - 1; ext++; entries--; - prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; + + /* Check for overlapping index extents */ + lblock = le32_to_cpu(ext_idx->ei_block); + if ((lblock <= prev) && prev) { + *pblk = ext4_idx_pblock(ext_idx); + return 0; + } ext_idx++; entries--; + prev = lblock; } } return 1; @@ -462,7 +469,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, depth)) { + if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; }
From: Zhang Yi yi.zhang@huawei.com
commit 9c6e071913792d80894cd0be98cc3c4b770e26d3 upstream.
Now that we can check out overlapping extents in leaf block and out-of-order index extents in index block. But the .ee_block in the first extent of one leaf block should equal to the .ei_block in it's parent index extent entry. This patch add a check to verify such inconsistent between the index and leaf block.
Signed-off-by: Zhang Yi yi.zhang@huawei.com Link: https://lore.kernel.org/r/20210908120850.4012324-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Leah Rumancik leah.rumancik@gmail.com --- fs/ext4/extents.c | 59 +++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 23 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4f8736b7e497..1ec6d0ccf5ba 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -390,7 +390,8 @@ static int ext4_valid_extent_idx(struct inode *inode,
static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent_header *eh, - ext4_fsblk_t *pblk, int depth) + ext4_lblk_t lblk, ext4_fsblk_t *pblk, + int depth) { unsigned short entries; ext4_lblk_t lblock = 0; @@ -406,6 +407,14 @@ static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; ext4_fsblk_t pblock = 0; + + /* + * The logical block in the first entry should equal to + * the number in the index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext->ee_block)) + return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; @@ -423,6 +432,14 @@ static int ext4_valid_extent_entries(struct inode *inode, } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); + + /* + * The logical block in the first entry should equal to + * the number in the parent index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext_idx->ei_block)) + return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; @@ -443,7 +460,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
static int __ext4_ext_check(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_header *eh, - int depth, ext4_fsblk_t pblk) + int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) { const char *error_msg; int max = 0, err = -EFSCORRUPTED; @@ -469,7 +486,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) { + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } @@ -498,7 +515,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, }
#define ext4_ext_check(inode, eh, depth, pblk) \ - __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) + __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
int ext4_ext_check_inode(struct inode *inode) { @@ -531,12 +548,14 @@ static void ext4_cache_extents(struct inode *inode,
static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, - struct inode *inode, ext4_fsblk_t pblk, int depth, - int flags) + struct inode *inode, struct ext4_extent_idx *idx, + int depth, int flags) { struct buffer_head *bh; int err; + ext4_fsblk_t pblk;
+ pblk = ext4_idx_pblock(idx); bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -552,8 +571,8 @@ __read_extent_tree_block(const char *function, unsigned int line, if (!ext4_has_feature_journal(inode->i_sb) || (inode->i_ino != le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); + err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), + depth, pblk, le32_to_cpu(idx->ei_block)); if (err) goto errout; } @@ -572,8 +591,8 @@ __read_extent_tree_block(const char *function, unsigned int line,
}
-#define read_extent_tree_block(inode, pblk, depth, flags) \ - __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ +#define read_extent_tree_block(inode, idx, depth, flags) \ + __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ (depth), (flags))
/* @@ -620,8 +639,7 @@ int ext4_ext_precache(struct inode *inode) i--; continue; } - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx++), + bh = read_extent_tree_block(inode, path[i].p_idx++, depth - i - 1, EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) { @@ -924,8 +942,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_depth = i; path[ppos].p_ext = NULL;
- bh = read_extent_tree_block(inode, path[ppos].p_block, --i, - flags); + bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; @@ -1524,7 +1541,6 @@ static int ext4_ext_search_right(struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; - ext4_fsblk_t block; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len;
@@ -1591,20 +1607,17 @@ static int ext4_ext_search_right(struct inode *inode, * follow it and find the closest allocated * block to the right */ ix++; - block = ext4_idx_pblock(ix); while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, block, - path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ix = EXT_FIRST_INDEX(eh); - block = ext4_idx_pblock(ix); put_bh(bh); }
- bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -3126,9 +3139,9 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, ext_debug("move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx), depth - i - 1, - EXT4_EX_NOCACHE); + bh = read_extent_tree_block(inode, path[i].p_idx, + depth - i - 1, + EXT4_EX_NOCACHE); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh);
From: Zhang Yi yi.zhang@huawei.com
commit 0f2f87d51aebcf71a709b52f661d681594c7dffa upstream.
In the most error path of current extents updating operations are not roll back partial updates properly when some bad things happens(.e.g in ext4_ext_insert_extent()). So we may get an inconsistent extents tree if journal has been aborted due to IO error, which may probability lead to BUGON later when we accessing these extent entries in errors=continue mode. This patch drop extent buffer's verify flag before updatng the contents in ext4_ext_get_access(), and reset it after updating in __ext4_ext_dirty(). After this patch we could force to check the extent buffer if extents tree updating was break off, make sure the extents are consistent.
Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Theodore Ts'o tytso@mit.edu Link: https://lore.kernel.org/r/20210908120850.4012324-4-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Leah Rumancik leah.rumancik@gmail.com --- fs/ext4/extents.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1ec6d0ccf5ba..f1bbce4350c4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -133,14 +133,25 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, static int ext4_ext_get_access(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { + int err = 0; + if (path->p_bh) { /* path points to block */ BUFFER_TRACE(path->p_bh, "get_write_access"); - return ext4_journal_get_write_access(handle, path->p_bh); + err = ext4_journal_get_write_access(handle, path->p_bh); + + /* + * The extent buffer's verified bit will be set again in + * __ext4_ext_dirty(). We could leave an inconsistent + * buffer if the extents updating procudure break off du + * to some error happens, force to check it again. + */ + if (!err) + clear_buffer_verified(path->p_bh); } /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ - return 0; + return err; }
/* @@ -160,6 +171,9 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); + /* Extents updating done, re-set verified flag */ + if (!err) + set_buffer_verified(path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode);
On Thu, Feb 17, 2022 at 02:59:12PM -0800, Leah Rumancik wrote:
From: Zhang Yi yi.zhang@huawei.com
commit 8dd27fecede55e8a4e67eef2878040ecad0f0d33 upstream.
After commit 5946d089379a ("ext4: check for overlapping extents in ext4_valid_extent_entries()"), we can check out the overlapping extent entry in leaf extent blocks. But the out-of-order extent entry in index extent blocks could also trigger bad things if the filesystem is inconsistent. So this patch add a check to figure out the out-of-order index extents and return error.
[Added pblk argument to ext4_valid_extent_entries because pblk is updated in the case of overlapping extents. This argument was added in commit 54d3adbc29f0c7c53890da1683e629cd220d7201.]
Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Theodore Ts'o tytso@mit.edu Link: https://lore.kernel.org/r/20210908120850.4012324-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Leah Rumancik leah.rumancik@gmail.com
fs/ext4/extents.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-)
All now queued up, thanks.
greg k-h
Leah, thanks for doing the backport to 5.4! And Greg, thanks for queuing them up.
I note that if we first cherry pick into 4.19.y a fix from 5.2 that probably should have been cc'ed to stable to begin with:
0a944e8a6c66 ("ext4: don't perform block validity checks on the journal inode")
Leah's three backports to 5.4 will then apply to 4.19 LTS; I've run regression tests with the cherry-pick of 0a944e8a6c66 and Leah's three backports applied to 4.19.230, and the resulting kernel looks fine and prevents a kernel crash when running ext4/054.
Greg, would you prefer that I send the patches for 4.19.y, or do you have what you need to do the cherry pick (all of the cherry picks are clean, and didn't require any manual resolution)?
Thanks!
- Ted
On Fri, Feb 18, 2022 at 02:57:22PM -0500, Theodore Ts'o wrote:
Leah, thanks for doing the backport to 5.4! And Greg, thanks for queuing them up.
I note that if we first cherry pick into 4.19.y a fix from 5.2 that probably should have been cc'ed to stable to begin with:
0a944e8a6c66 ("ext4: don't perform block validity checks on the journal inode")
This commit is already in the following releases: 3.16.85 4.4.221 4.4.263 4.9.221 4.9.263 4.14.178 4.14.227 4.19.73 4.19.182 5.2 5.7.18 5.8.4
Leah's three backports to 5.4 will then apply to 4.19 LTS; I've run regression tests with the cherry-pick of 0a944e8a6c66 and Leah's three backports applied to 4.19.230, and the resulting kernel looks fine and prevents a kernel crash when running ext4/054.
Greg, would you prefer that I send the patches for 4.19.y, or do you have what you need to do the cherry pick (all of the cherry picks are clean, and didn't require any manual resolution)?
The second commit does not apply cleanly, so I would need working backported ones to get this right. I have queued up the first one now already.
thanks,
greg k-h
linux-stable-mirror@lists.linaro.org