Hole puching currently evicts pages from page cache and then goes on to remove blocks from the inode. This happens under both i_mmap_sem and i_rwsem held exclusively which provides appropriate serialization with racing page faults. However there is currently nothing that prevents ordinary read(2) from racing with the hole punch and instantiating page cache page after hole punching has evicted page cache but before it has removed blocks from the inode. This page cache page will be mapping soon to be freed block and that can lead to returning stale data to userspace or even filesystem corruption.
Fix the problem by protecting reads as well as readahead requests with i_mmap_sem.
CC: stable@vger.kernel.org Reported-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Jan Kara jack@suse.cz --- fs/ext4/file.c | 16 ++++++++++++++++ fs/ext4/inode.c | 21 +++++++++++++++++++++ 2 files changed, 37 insertions(+)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 349b27f0dda0..d66f7c08b123 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -30,6 +30,7 @@ #include <linux/uio.h> #include <linux/mman.h> #include <linux/backing-dev.h> +#include <linux/fadvise.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" @@ -131,6 +132,20 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); }
+static int ext4_fadvise(struct file *filp, loff_t start, loff_t end, int advice) +{ + struct inode *inode = file_inode(filp); + int ret; + + /* Readahead needs protection from hole punching and similar ops */ + if (advice == POSIX_FADV_WILLNEED) + down_read(&EXT4_I(inode)->i_mmap_sem); + ret = generic_fadvise(filp, start, end, advice); + if (advice == POSIX_FADV_WILLNEED) + up_read(&EXT4_I(inode)->i_mmap_sem); + return ret; +} + /* * Called when an inode is released. Note that this is different * from ext4_file_open: open gets called at every open, but release @@ -911,6 +926,7 @@ const struct file_operations ext4_file_operations = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, + .fadvise = ext4_fadvise, };
const struct inode_operations ext4_file_inode_operations = { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c173c8405856..a01569f2fa49 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3646,9 +3646,28 @@ static int ext4_iomap_swap_activate(struct swap_info_struct *sis, &ext4_iomap_report_ops); }
+static int ext4_fill_pages(struct kiocb *iocb, size_t len, bool partial_page, + struct page **pagep, unsigned int nr_pages) +{ + struct ext4_inode_info *ei = EXT4_I(iocb->ki_filp->f_mapping->host); + int ret; + + /* + * Protect adding of pages into page cache against hole punching and + * other cache manipulation functions so that we don't expose + * potentially stale user data. + */ + down_read(&ei->i_mmap_sem); + ret = generic_file_buffered_read_get_pages(iocb, len, partial_page, + pagep, nr_pages); + up_read(&ei->i_mmap_sem); + return ret; +} + static const struct address_space_operations ext4_aops = { .readpage = ext4_readpage, .readahead = ext4_readahead, + .fill_pages = ext4_fill_pages, .writepage = ext4_writepage, .writepages = ext4_writepages, .write_begin = ext4_write_begin, @@ -3667,6 +3686,7 @@ static const struct address_space_operations ext4_aops = { static const struct address_space_operations ext4_journalled_aops = { .readpage = ext4_readpage, .readahead = ext4_readahead, + .fill_pages = ext4_fill_pages, .writepage = ext4_writepage, .writepages = ext4_writepages, .write_begin = ext4_write_begin, @@ -3684,6 +3704,7 @@ static const struct address_space_operations ext4_journalled_aops = { static const struct address_space_operations ext4_da_aops = { .readpage = ext4_readpage, .readahead = ext4_readahead, + .fill_pages = ext4_fill_pages, .writepage = ext4_writepage, .writepages = ext4_writepages, .write_begin = ext4_da_write_begin,