If the starting position of our insert range happens to be in the hole
between the two ext4_extent_idx, because the lblk of the ext4_extent in
the previous ext4_extent_idx is always less than the start, which leads
to the "extent" variable access across the boundary, the following UAF is
triggered:
==================================================================
BUG: KASAN: use-after-free in ext4_ext_shift_extents+0x257/0x790
Read of size 4 at addr ffff88819807a008 by task fallocate/8010
CPU: 3 PID: 8010 Comm: fallocate Tainted: G E 5.10.0+ #492
Call Trace:
dump_stack+0x7d/0xa3
print_address_description.constprop.0+0x1e/0x220
kasan_report.cold+0x67/0x7f
ext4_ext_shift_extents+0x257/0x790
ext4_insert_range+0x5b6/0x700
ext4_fallocate+0x39e/0x3d0
vfs_fallocate+0x26f/0x470
ksys_fallocate+0x3a/0x70
__x64_sys_fallocate+0x4f/0x60
do_syscall_64+0x33/0x40
entry_SYSCALL_64_after_hwframe+0x44/0xa9
==================================================================
For right shifts, we can divide them into the following situations:
1. When the first ee_block of ext4_extent_idx is greater than or equal to
start, make right shifts directly from the first ee_block.
1) If it is greater than start, we need to continue searching in the
previous ext4_extent_idx.
2) If it is equal to start, we can exit the loop (iterator=NULL).
2. When the first ee_block of ext4_extent_idx is less than start, then
traverse from the last extent to find the first extent whose ee_block
is less than start.
1) If extent is still the last extent after traversal, it means that
the last ee_block of ext4_extent_idx is less than start, that is,
start is located in the hole between idx and (idx+1), so we can
exit the loop directly (break) without right shifts.
2) Otherwise, make right shifts at the corresponding position of the
found extent, and then exit the loop (iterator=NULL).
Fixes: 331573febb6a ("ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate")
Cc: stable(a)vger.kernel.org # v4.2+
Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
---
V1->V2:
Initialize "ret" after the "again:" label to avoid return value mismatch.
Refactoring reduces cycles and makes code more readable.
fs/ext4/extents.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c148bb97b527..39c9f87de0be 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5179,6 +5179,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* and it is decreased till we reach start.
*/
again:
+ ret = 0;
if (SHIFT == SHIFT_LEFT)
iterator = &start;
else
@@ -5222,14 +5223,21 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- if (le32_to_cpu(extent->ee_block) > 0)
+ if (le32_to_cpu(extent->ee_block) > start)
*iterator = le32_to_cpu(extent->ee_block) - 1;
- else
- /* Beginning is reached, end of the loop */
+ else if (le32_to_cpu(extent->ee_block) == start)
iterator = NULL;
- /* Update path extent in case we need to stop */
- while (le32_to_cpu(extent->ee_block) < start)
+ else {
+ extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+ while (le32_to_cpu(extent->ee_block) >= start)
+ extent--;
+
+ if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
+ break;
+
extent++;
+ iterator = NULL;
+ }
path[depth].p_ext = extent;
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
--
2.31.1
From: Eric Biggers <ebiggers(a)google.com>
Due to several different off-by-one errors, or perhaps due to a late
change in design that wasn't fully reflected in the code that was
actually merged, there are several very strange constraints on how
fast-commit blocks are filled with tlv entries:
- tlvs must start at least 10 bytes before the end of the block, even
though the minimum tlv length is 8. Otherwise, the replay code will
ignore them. (BUG: ext4_fc_reserve_space() could violate this
requirement if called with a len of blocksize - 9 or blocksize - 8.
Fortunately, this doesn't seem to happen currently.)
- tlvs must end at least 1 byte before the end of the block. Otherwise
the replay code will consider them to be invalid. This quirk
contributed to a bug (fixed by an earlier commit) where uninitialized
memory was being leaked to disk in the last byte of blocks.
Also, strangely these constraints don't apply to the replay code in
e2fsprogs, which will accept any tlvs in the blocks (with no bounds
checks at all, but that is a separate issue...).
Given that this all seems to be a bug, let's fix it by just filling
blocks with tlv entries in the natural way.
Note that old kernels will be unable to replay fast-commit journals
created by kernels that have this commit.
Fixes: aa75f4d3daae ("ext4: main fast-commit commit path")
Cc: <stable(a)vger.kernel.org> # v5.10+
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/ext4/fast_commit.c | 66 +++++++++++++++++++++----------------------
1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 892fa7c7a768b..7ed71c652f67f 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -714,43 +714,43 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
struct buffer_head *bh;
int bsize = sbi->s_journal->j_blocksize;
int ret, off = sbi->s_fc_bytes % bsize;
- int pad_len;
+ int remaining;
u8 *dst;
/*
- * After allocating len, we should have space at least for a 0 byte
- * padding.
+ * If 'len' is too long to fit in any block alongside a PAD tlv, then we
+ * cannot fulfill the request.
*/
- if (len + EXT4_FC_TAG_BASE_LEN > bsize)
+ if (len > bsize - EXT4_FC_TAG_BASE_LEN)
return NULL;
- if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) {
- /*
- * Only allocate from current buffer if we have enough space for
- * this request AND we have space to add a zero byte padding.
- */
- if (!sbi->s_fc_bh) {
- ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
- if (ret)
- return NULL;
- sbi->s_fc_bh = bh;
- }
- sbi->s_fc_bytes += len;
- return sbi->s_fc_bh->b_data + off;
+ if (!sbi->s_fc_bh) {
+ ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
+ if (ret)
+ return NULL;
+ sbi->s_fc_bh = bh;
}
- /* Need to add PAD tag */
dst = sbi->s_fc_bh->b_data + off;
+
+ /*
+ * Allocate the bytes in the current block if we can do so while still
+ * leaving enough space for a PAD tlv.
+ */
+ remaining = bsize - EXT4_FC_TAG_BASE_LEN - off;
+ if (len <= remaining) {
+ sbi->s_fc_bytes += len;
+ return dst;
+ }
+
+ /*
+ * Else, terminate the current block with a PAD tlv, then allocate a new
+ * block and allocate the bytes at the start of that new block.
+ */
+
tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
- pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN;
- tl.fc_len = cpu_to_le16(pad_len);
+ tl.fc_len = cpu_to_le16(remaining);
ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
- dst += EXT4_FC_TAG_BASE_LEN;
- if (pad_len > 0) {
- ext4_fc_memzero(sb, dst, pad_len, crc);
- dst += pad_len;
- }
- /* Don't leak uninitialized memory in the unused last byte. */
- *dst = 0;
+ ext4_fc_memzero(sb, dst + EXT4_FC_TAG_BASE_LEN, remaining, crc);
ext4_fc_submit_bh(sb, false);
@@ -758,7 +758,7 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
if (ret)
return NULL;
sbi->s_fc_bh = bh;
- sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
+ sbi->s_fc_bytes += bsize - off + len;
return sbi->s_fc_bh->b_data;
}
@@ -789,7 +789,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
off = sbi->s_fc_bytes % bsize;
tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
- tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
+ tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail));
sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
@@ -2056,7 +2056,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
state = &sbi->s_fc_replay_state;
start = (u8 *)bh->b_data;
- end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
+ end = start + journal->j_blocksize;
if (state->fc_replay_expected_off == 0) {
state->fc_cur_tag = 0;
@@ -2077,7 +2077,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
}
state->fc_replay_expected_off++;
- for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
+ for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN;
cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
ext4_fc_get_tl(&tl, cur);
val = cur + EXT4_FC_TAG_BASE_LEN;
@@ -2195,9 +2195,9 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
#endif
start = (u8 *)bh->b_data;
- end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
+ end = start + journal->j_blocksize;
- for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
+ for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN;
cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
ext4_fc_get_tl(&tl, cur);
val = cur + EXT4_FC_TAG_BASE_LEN;
--
2.38.1