From: Eric Biggers <ebiggers(a)google.com>
ext4 didn't properly clean up if verity failed to be enabled on a file:
- It left verity metadata (pages past EOF) in the page cache, which
would be exposed to userspace if the file was later extended.
- It didn't truncate the verity metadata at all (either from cache or
from disk) if an error occurred while setting the verity bit.
Fix these bugs by adding a call to truncate_inode_pages() and ensuring
that we truncate the verity metadata (both from cache and from disk) in
all error paths. Also rework the code to cleanly separate the success
path from the error paths, which makes it much easier to understand.
Reported-by: Yunlei He <heyunlei(a)hihonor.com>
Fixes: c93d8f885809 ("ext4: add basic fs-verity support")
Cc: <stable(a)vger.kernel.org> # v5.4+
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/ext4/verity.c | 90 ++++++++++++++++++++++++++++++------------------
1 file changed, 56 insertions(+), 34 deletions(-)
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index 5b7ba8f711538..acb12441c549b 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -201,55 +201,77 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc,
struct inode *inode = file_inode(filp);
const int credits = 2; /* superblock and inode for ext4_orphan_del() */
handle_t *handle;
+ struct ext4_iloc iloc;
int err = 0;
- int err2;
- if (desc != NULL) {
- /* Succeeded; write the verity descriptor. */
- err = ext4_write_verity_descriptor(inode, desc, desc_size,
- merkle_tree_size);
-
- /* Write all pages before clearing VERITY_IN_PROGRESS. */
- if (!err)
- err = filemap_write_and_wait(inode->i_mapping);
- }
+ /*
+ * If an error already occurred (which fs/verity/ signals by passing
+ * desc == NULL), then only clean-up is needed.
+ */
+ if (desc == NULL)
+ goto cleanup;
- /* If we failed, truncate anything we wrote past i_size. */
- if (desc == NULL || err)
- ext4_truncate(inode);
+ /* Append the verity descriptor. */
+ err = ext4_write_verity_descriptor(inode, desc, desc_size,
+ merkle_tree_size);
+ if (err)
+ goto cleanup;
/*
- * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and
- * deleting the inode from the orphan list, even if something failed.
- * If everything succeeded, we'll also set the verity bit in the same
- * transaction.
+ * Write all pages (both data and verity metadata). Note that this must
+ * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages
+ * beyond i_size won't be written properly. For crash consistency, this
+ * also must happen before the verity inode flag gets persisted.
*/
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto cleanup;
- ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+ /*
+ * Finally, set the verity inode flag and remove the inode from the
+ * orphan list (in a single transaction).
+ */
handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
if (IS_ERR(handle)) {
- ext4_orphan_del(NULL, inode);
- return PTR_ERR(handle);
+ err = PTR_ERR(handle);
+ goto cleanup;
}
- err2 = ext4_orphan_del(handle, inode);
- if (err2)
- goto out_stop;
+ err = ext4_orphan_del(handle, inode);
+ if (err)
+ goto stop_and_cleanup;
- if (desc != NULL && !err) {
- struct ext4_iloc iloc;
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto stop_and_cleanup;
+
+ ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
+ ext4_set_inode_flags(inode, false);
+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+ if (err)
+ goto stop_and_cleanup;
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto out_stop;
- ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
- ext4_set_inode_flags(inode, false);
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
- }
-out_stop:
ext4_journal_stop(handle);
- return err ?: err2;
+
+ ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+ return 0;
+
+
+stop_and_cleanup:
+ ext4_journal_stop(handle);
+cleanup:
+ /*
+ * Verity failed to be enabled, so clean up by truncating any verity
+ * metadata that was written beyond i_size (both from cache and from
+ * disk), removing the inode from the orphan list (if it wasn't done
+ * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS.
+ */
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ ext4_truncate(inode);
+ ext4_orphan_del(NULL, inode);
+ ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+ return err;
}
static int ext4_get_verity_descriptor_location(struct inode *inode,
--
2.30.1
To allow performing tag checks on page_alloc addresses obtained via
page_address(), tag-based KASAN modes store tags for page_alloc
allocations in page->flags.
Currently, the default tag value stored in page->flags is 0x00.
Therefore, page_address() returns a 0x00ffff... address for pages
that were not allocated via page_alloc.
This might cause problems. A particular case we encountered is a conflict
with KFENCE. If a KFENCE-allocated slab object is being freed via
kfree(page_address(page) + offset), the address passed to kfree() will
get tagged with 0x00 (as slab pages keep the default per-page tags).
This leads to is_kfence_address() check failing, and a KFENCE object
ending up in normal slab freelist, which causes memory corruptions.
This patch changes the way KASAN stores tag in page-flags: they are now
stored xor'ed with 0xff. This way, KASAN doesn't need to initialize
per-page flags for every created page, which might be slow.
With this change, page_address() returns natively-tagged (with 0xff)
pointers for pages that didn't have tags set explicitly.
This patch fixes the encountered conflict with KFENCE and prevents more
similar issues that can occur in the future.
Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrey Konovalov <andreyknvl(a)google.com>
---
include/linux/mm.h | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 77e64e3eac80..c45c28f094a7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1440,16 +1440,28 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
+/*
+ * KASAN per-page tags are stored xor'ed with 0xff. This allows to avoid
+ * setting tags for all pages to native kernel tag value 0xff, as the default
+ * value 0x00 maps to 0xff.
+ */
+
static inline u8 page_kasan_tag(const struct page *page)
{
- if (kasan_enabled())
- return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
- return 0xff;
+ u8 tag = 0xff;
+
+ if (kasan_enabled()) {
+ tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
+ tag ^= 0xff;
+ }
+
+ return tag;
}
static inline void page_kasan_tag_set(struct page *page, u8 tag)
{
if (kasan_enabled()) {
+ tag ^= 0xff;
page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
}
--
2.31.0.rc2.261.g7f71774620-goog