Each ondisk inode should be aligned with inode slot boundary (32-byte alignment) because of nid calculation formula, so all compact inodes (32 byte) cannot across page boundary. However, extended inode is now 64-byte form, which can across page boundary in principle if the location is specified on purpose, although it's hard to be generated by mkfs due to the allocation policy and rarely used by Android use case now mainly for > 4GiB files.
For now, only two fields `i_ctime_nsec` and `i_nlink' couldn't be read from disk properly and cause out-of-bound memory read with random value.
Let's fix now.
Fixes: 431339ba9042 ("staging: erofs: add inode operations") Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Gao Xiang hsiangkao@redhat.com --- The attachment is a designed image for reference.
fs/erofs/inode.c | 121 +++++++++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 42 deletions(-)
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 7dd4bbe9674f..586f9d0a8b2f 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -8,31 +8,80 @@
#include <trace/events/erofs.h>
-/* no locking */ -static int erofs_read_inode(struct inode *inode, void *data) +/* + * if inode is successfully read, return its inode page (or sometimes + * the inode payload page if it's an extended inode) in order to fill + * inline data if possible. + */ +static struct page *erofs_read_inode(struct inode *inode, + unsigned int *ofs) { + struct super_block *sb = inode->i_sb; + struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_inode *vi = EROFS_I(inode); - struct erofs_inode_compact *dic = data; - struct erofs_inode_extended *die; + const erofs_off_t inode_loc = iloc(sbi, vi->nid); + + erofs_blk_t blkaddr, nblks = 0; + struct page *page; + struct erofs_inode_compact *dic; + struct erofs_inode_extended *die, *copied = NULL; + unsigned int ifmt; + int err;
- const unsigned int ifmt = le16_to_cpu(dic->i_format); - struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); - erofs_blk_t nblks = 0; + blkaddr = erofs_blknr(inode_loc); + *ofs = erofs_blkoff(inode_loc);
- vi->datalayout = erofs_inode_datalayout(ifmt); + erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", + __func__, vi->nid, *ofs, blkaddr); + + page = erofs_get_meta_page(sb, blkaddr); + if (IS_ERR(page)) { + erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", + vi->nid, PTR_ERR(page)); + return page; + }
+ dic = page_address(page) + *ofs; + ifmt = le16_to_cpu(dic->i_format); + + vi->datalayout = erofs_inode_datalayout(ifmt); if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) { erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu", vi->datalayout, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; }
switch (erofs_inode_version(ifmt)) { case EROFS_INODE_LAYOUT_EXTENDED: - die = data; - vi->inode_isize = sizeof(struct erofs_inode_extended); + /* check if the inode acrosses page boundary */ + if (*ofs + vi->inode_isize <= PAGE_SIZE) { + *ofs += vi->inode_isize; + die = (struct erofs_inode_extended *)dic; + } else { + const unsigned int gotten = PAGE_SIZE - *ofs; + + copied = kmalloc(vi->inode_isize, GFP_NOFS); + if (!copied) { + err = -ENOMEM; + goto err_out; + } + memcpy(copied, dic, gotten); + unlock_page(page); + put_page(page); + + page = erofs_get_meta_page(sb, blkaddr + 1); + if (IS_ERR(page)) { + erofs_err(sb, "failed to get inode payload page (nid: %llu), err %ld", + vi->nid, PTR_ERR(page)); + kfree(copied); + return page; + } + *ofs = vi->inode_isize - gotten; + memcpy((u8 *)copied + gotten, page_address(page), *ofs); + die = copied; + } vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
inode->i_mode = le16_to_cpu(die->i_mode); @@ -69,9 +118,12 @@ static int erofs_read_inode(struct inode *inode, void *data) /* total blocks for compressed files */ if (erofs_inode_is_data_compressed(vi->datalayout)) nblks = le32_to_cpu(die->i_u.compressed_blocks); + + kfree(copied); break; case EROFS_INODE_LAYOUT_COMPACT: vi->inode_isize = sizeof(struct erofs_inode_compact); + *ofs += vi->inode_isize; vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
inode->i_mode = le16_to_cpu(dic->i_mode); @@ -111,8 +163,8 @@ static int erofs_read_inode(struct inode *inode, void *data) erofs_err(inode->i_sb, "unsupported on-disk inode version %u of nid %llu", erofs_inode_version(ifmt), vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; }
if (!nblks) @@ -120,13 +172,18 @@ static int erofs_read_inode(struct inode *inode, void *data) inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; else inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK; - return 0; + return page;
bogusimode: erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu", inode->i_mode, vi->nid); + err = -EFSCORRUPTED; +err_out: DBG_BUGON(1); - return -EFSCORRUPTED; + kfree(copied); + unlock_page(page); + put_page(page); + return ERR_PTR(err); }
static int erofs_fill_symlink(struct inode *inode, void *data, @@ -146,7 +203,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data, if (!lnk) return -ENOMEM;
- m_pofs += vi->inode_isize + vi->xattr_isize; + m_pofs += vi->xattr_isize; /* inline symlink data shouldn't cross page boundary as well */ if (m_pofs + inode->i_size > PAGE_SIZE) { kfree(lnk); @@ -167,37 +224,17 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
static int erofs_fill_inode(struct inode *inode, int isdir) { - struct super_block *sb = inode->i_sb; struct erofs_inode *vi = EROFS_I(inode); struct page *page; - void *data; - int err; - erofs_blk_t blkaddr; unsigned int ofs; - erofs_off_t inode_loc; + int err = 0;
trace_erofs_fill_inode(inode, isdir); - inode_loc = iloc(EROFS_SB(sb), vi->nid); - blkaddr = erofs_blknr(inode_loc); - ofs = erofs_blkoff(inode_loc); - - erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", - __func__, vi->nid, ofs, blkaddr);
- page = erofs_get_meta_page(sb, blkaddr); - - if (IS_ERR(page)) { - erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", - vi->nid, PTR_ERR(page)); + /* read inode base data from disk */ + page = erofs_read_inode(inode, &ofs); + if (IS_ERR(page)) return PTR_ERR(page); - } - - DBG_BUGON(!PageUptodate(page)); - data = page_address(page); - - err = erofs_read_inode(inode, data + ofs); - if (err) - goto out_unlock;
/* setup the new inode */ switch (inode->i_mode & S_IFMT) { @@ -210,7 +247,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir) inode->i_fop = &erofs_dir_fops; break; case S_IFLNK: - err = erofs_fill_symlink(inode, data, ofs); + err = erofs_fill_symlink(inode, page_address(page), ofs); if (err) goto out_unlock; inode_nohighmem(inode);
On 2020/7/30 1:58, Gao Xiang wrote:
Each ondisk inode should be aligned with inode slot boundary (32-byte alignment) because of nid calculation formula, so all compact inodes (32 byte) cannot across page boundary. However, extended inode is now 64-byte form, which can across page boundary in principle if the location is specified on purpose, although it's hard to be generated by mkfs due to the allocation policy and rarely used by Android use case now mainly for > 4GiB files.
For now, only two fields `i_ctime_nsec` and `i_nlink' couldn't be read from disk properly and cause out-of-bound memory read with random value.
Let's fix now.
Fixes: 431339ba9042 ("staging: erofs: add inode operations") Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Gao Xiang hsiangkao@redhat.com
Reviewed-by: Chao Yu yuchao0@huawei.com
Thanks,
linux-stable-mirror@lists.linaro.org