commit 003fb0a51162d940f25fc35e70b0996a12c9e08a upstream.
Requests to the mmc layer usually come through a block device IO.
The exceptions are the ioctl interface, RPMB chardev ioctl
and debugfs, which issue their own blk_mq requests through
blk_execute_rq and do not query the BLK_STS error but the
mmcblk-internal drv_op_result. This patch ensures that drv_op_result
defaults to an error and has to be overwritten by the operation
to be considered successful.
The behavior leads to a bug where the request never propagates
the error, e.g. by directly erroring out at mmc_blk_mq_issue_rq if
mmc_blk_part_switch fails. The ioctl caller of the rpmb chardev then
can never see an error (BLK_STS_IOERR, but drv_op_result is unchanged)
and thus may assume that their call executed successfully when it did not.
While always checking the blk_execute_rq return value would be
advised, let's eliminate the error by always setting
drv_op_result as -EIO to be overwritten on success (or other error)
Fixes: 614f0388f580 ("mmc: block: move single ioctl() commands to block requests")
Signed-off-by: Christian Loehle <cloehle(a)hyperstone.com>
---
drivers/mmc/core/block.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 79e5acc6e964..a6228bfdf3ea 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -243,6 +243,7 @@ static ssize_t power_ro_lock_store(struct device *dev,
goto out_put;
}
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
blk_execute_rq(mq->queue, NULL, req, 0);
ret = req_to_mmc_queue_req(req)->drv_op_result;
blk_put_request(req);
@@ -671,6 +672,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md,
idatas[0] = idata;
req_to_mmc_queue_req(req)->drv_op =
rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
req_to_mmc_queue_req(req)->drv_op_data = idatas;
req_to_mmc_queue_req(req)->ioc_count = 1;
blk_execute_rq(mq->queue, NULL, req, 0);
@@ -741,6 +743,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
}
req_to_mmc_queue_req(req)->drv_op =
rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
req_to_mmc_queue_req(req)->drv_op_data = idata;
req_to_mmc_queue_req(req)->ioc_count = num_of_cmds;
blk_execute_rq(mq->queue, NULL, req, 0);
@@ -2590,6 +2593,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val)
if (IS_ERR(req))
return PTR_ERR(req);
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
blk_execute_rq(mq->queue, NULL, req, 0);
ret = req_to_mmc_queue_req(req)->drv_op_result;
if (ret >= 0) {
@@ -2628,6 +2632,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp)
goto out_free;
}
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_EXT_CSD;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
req_to_mmc_queue_req(req)->drv_op_data = &ext_csd;
blk_execute_rq(mq->queue, NULL, req, 0);
err = req_to_mmc_queue_req(req)->drv_op_result;
--
2.37.3
Syzbot reported a BUG_ON:
==================================================================
EXT4-fs (loop0): mounted filesystem without journal. Quota mode: none.
EXT4-fs error (device loop0): ext4_mb_generate_buddy:1098: group 0, block
bitmap and bg descriptor inconsistent: 25 vs 150994969 free clusters
------------[ cut here ]------------
kernel BUG at fs/ext4/ext4_jbd2.c:53!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 1 PID: 494 Comm: syz-executor.0 6.1.0-rc7-syzkaller-ga4412fdd49dc #0
RIP: 0010:__ext4_journal_stop+0x1b3/0x1c0
[...]
Call Trace:
ext4_write_inline_data_end+0xa39/0xdf0
ext4_da_write_end+0x1e2/0x950
generic_perform_write+0x401/0x5f0
ext4_buffered_write_iter+0x35f/0x640
ext4_file_write_iter+0x198/0x1cd0
vfs_write+0x8b5/0xef0
[...]
==================================================================
The above BUG_ON is triggered by the following race:
cpu1 cpu2
________________________|________________________
ksys_write
vfs_write
new_sync_write
ext4_file_write_iter
ext4_buffered_write_iter
generic_perform_write
ext4_da_write_begin
do_fault
do_page_mkwrite
ext4_page_mkwrite
ext4_convert_inline_data
ext4_convert_inline_data_nolock
ext4_destroy_inline_data_nolock
//clear EXT4_STATE_MAY_INLINE_DATA
ext4_map_blocks --> return error
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)
ext4_block_write_begin
ext4_restore_inline_data
// set EXT4_STATE_MAY_INLINE_DATA
ext4_da_write_end
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)
ext4_write_inline_data_end
handle=NULL
ext4_journal_stop(handle)
__ext4_journal_stop
ext4_put_nojournal(handle)
ref_cnt = (unsigned long)handle
BUG_ON(ref_cnt == 0) ---> BUG_ON
The root cause of this problem is that the ext4_convert_inline_data() in
ext4_page_mkwrite() does not grab i_rwsem, so it may race with
ext4_buffered_write_iter() and cause the write_begin() and write_end()
functions to be inconsistent and trigger BUG_ON.
To solve the above issue, we can not add inode_lock directly to
ext4_page_mkwrite(), which would not only cause performance degradation but
also ABBA deadlock (see Link). Hence we move ext4_convert_inline_data() to
ext4_file_mmap(), and only when inline_data is enabled and mmap a writeable
file in shared mode, we hold the lock to convert, which avoids the above
problems.
Link: https://lore.kernel.org/r/20230530102804.6t7np7om6tczscuo@quack3/
Reported-by: Jun Nie <jun.nie(a)linaro.org>
Closes: https://lore.kernel.org/lkml/63903521.5040307@huawei.com/t/
Reported-by: syzbot+a158d886ca08a3fecca4(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?id=899b37f20ce4072bcdfecfe1647b39602e956e…
Fixes: 7b4cc9787fe3 ("ext4: evict inline data when writing to memory map")
CC: stable(a)vger.kernel.org # 4.12+
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
---
fs/ext4/file.c | 24 +++++++++++++++++++++++-
fs/ext4/inode.c | 4 ----
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d101b3b0c7da..9df82d72eb90 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -795,7 +795,8 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct super_block *sb = inode->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
struct dax_device *dax_dev = sbi->s_daxdev;
if (unlikely(ext4_forced_shutdown(sbi)))
@@ -808,6 +809,27 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (!daxdev_mapping_supported(vma, dax_dev))
return -EOPNOTSUPP;
+ /*
+ * Writing via mmap has no logic to handle inline data, so we
+ * need to call ext4_convert_inline_data() to convert the inode
+ * to normal format before doing so, otherwise a BUG_ON will be
+ * triggered in ext4_writepages() due to the
+ * EXT4_STATE_MAY_INLINE_DATA flag. Moreover, we need to grab
+ * i_rwsem during conversion, since clearing and setting the
+ * inline data flag may race with ext4_buffered_write_iter()
+ * to trigger a BUG_ON.
+ */
+ if (ext4_has_feature_inline_data(sb) &&
+ vma->vm_flags & VM_SHARED && vma->vm_flags & VM_MAYWRITE) {
+ int err;
+
+ inode_lock(inode);
+ err = ext4_convert_inline_data(inode);
+ inode_unlock(inode);
+ if (err)
+ return err;
+ }
+
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ce5f21b6c2b3..31844c4ec9fe 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -6043,10 +6043,6 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
filemap_invalidate_lock_shared(mapping);
- err = ext4_convert_inline_data(inode);
- if (err)
- goto out_ret;
-
/*
* On data journalling we skip straight to the transaction handle:
* there's no delalloc; page truncated will be checked later; the
--
2.31.1
Hi Linus
sorry for this unusual procedure of me requesting a patch to be pulled.
I asked for several months the maintainers (David: asymmetric keys,
Jarkko: key subsystem) to pick my patch but without any luck.
I signed the tag, but probably it would not matter, since my key is not
among your trusted keys.
The following changes since commit 921bdc72a0d68977092d6a64855a1b8967acc1d9:
Merge tag 'mmc-v6.4-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc (2023-06-02 08:35:13 -0400)
are available in the Git repository at:
https://github.com/robertosassu/linux.git tags/asym-keys-fix-for-linus-v6.4-rc5
for you to fetch changes up to c3d03e8e35e005e1a614e51bb59053eeb5857f76:
KEYS: asymmetric: Copy sig and digest in public_key_verify_signature() (2023-06-02 15:36:23 +0200)
----------------------------------------------------------------
Asymmetric keys fix for v6.4-rc5
Here is a small fix to make an unconditional copy of the buffer passed
to crypto operations, to take into account the case of the stack not in
the linear mapping area.
It has been tested and verified to fix the bug.
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
----------------------------------------------------------------
Roberto Sassu (1):
KEYS: asymmetric: Copy sig and digest in public_key_verify_signature()
crypto/asymmetric_keys/public_key.c | 38 +++++++++++++++++++++-----------------
1 file changed, 21 insertions(+), 17 deletions(-)
We are a premier financial institution offering flexible loans ranging from 10,000 to 10 million euros at an interest rate of 2% to interested individuals or companies. E-Mail:aflimited@secretary.net
NULL the dangling pipe reference while clearing watch_queue.
If not done, a reference to a freed pipe remains in the watch_queue,
as this function is called before freeing a pipe in free_pipe_info()
(see line 834 of fs/pipe.c).
The sole use of wqueue->defunct is for checking if the watch queue has
been cleared, but wqueue->pipe is also NULLed while clearing.
Thus, wqueue->defunct is superfluous, as wqueue->pipe can be checked
for NULL. Hence, the former can be removed.
Tested with keyutils testsuite.
Cc: stable(a)vger.kernel.org # 6.1
Signed-off-by: Siddh Raman Pant <code(a)siddh.me>
---
Changes in v5:
- Rebased to latest mainline.
- Added Cc to stable.
- Specify tests passing. Note that all tests in the keyutils testsuite
passed, except tests/features/builtin_trusted, which we should not
worry about as it requires some kernel preparation according to
David Howells in v4 discussion.
Changes in v4 (11 Jan 2023):
- Drop preceeding kerneldoc-changes patch and change appropriately.
Changes in v3 (8 Jan 2023):
- Minor rephrase of comment before NULLing in watch_queue_clear().
Changes in v2 (6 Aug 2022):
- Merged the NULLing and removing defunct patches.
- Removed READ_ONCE barrier in lock_wqueue().
- Better commit messages.
include/linux/watch_queue.h | 3 +--
kernel/watch_queue.c | 12 ++++++------
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index fc6bba20273b..45cd42f55d49 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -38,7 +38,7 @@ struct watch_filter {
struct watch_queue {
struct rcu_head rcu;
struct watch_filter __rcu *filter;
- struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */
+ struct pipe_inode_info *pipe; /* Pipe we use as a buffer, NULL if queue closed */
struct hlist_head watches; /* Contributory watches */
struct page **notes; /* Preallocated notifications */
unsigned long *notes_bitmap; /* Allocation bitmap for notes */
@@ -46,7 +46,6 @@ struct watch_queue {
spinlock_t lock;
unsigned int nr_notes; /* Number of notes */
unsigned int nr_pages; /* Number of pages in notes[] */
- bool defunct; /* T when queues closed */
};
/*
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index e91cb4c2833f..d0b6b390ee42 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -42,7 +42,7 @@ MODULE_AUTHOR("Red Hat, Inc.");
static inline bool lock_wqueue(struct watch_queue *wqueue)
{
spin_lock_bh(&wqueue->lock);
- if (unlikely(wqueue->defunct)) {
+ if (unlikely(!wqueue->pipe)) {
spin_unlock_bh(&wqueue->lock);
return false;
}
@@ -104,9 +104,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
unsigned int head, tail, mask, note, offset, len;
bool done = false;
- if (!pipe)
- return false;
-
spin_lock_irq(&pipe->rd_wait.lock);
mask = pipe->ring_size - 1;
@@ -603,8 +600,11 @@ void watch_queue_clear(struct watch_queue *wqueue)
rcu_read_lock();
spin_lock_bh(&wqueue->lock);
- /* Prevent new notifications from being stored. */
- wqueue->defunct = true;
+ /*
+ * This pipe can be freed by callers like free_pipe_info().
+ * Removing this reference also prevents new notifications.
+ */
+ wqueue->pipe = NULL;
while (!hlist_empty(&wqueue->watches)) {
watch = hlist_entry(wqueue->watches.first, struct watch, queue_node);
--
2.39.2
Hello, the following patch cherry-picked cleanly during my testing.
Subject of Patch: xfs: verify buffer contents when we skip log replay
Commit Hash: 22ed903eee23a5b174e240f1cdfa9acf393a5210
Reason why it should be applied: This fixes CVE-2023-2124.
Kernel Versions to be applied to: 6.1, 5.15, 5.10
Thank You,
Michael Kochera