From: Lukas Czerner lczerner@redhat.com
[ Upstream commit 24dc9864914eb5813173cfa53313fcd02e4aea7d ]
Callers of __jbd2_journal_unfile_buffer() and __jbd2_journal_refile_buffer() assume that the b_transaction is set. In fact if it's not, we can end up with journal_head refcounting errors leading to crash much later that might be very hard to track down. Add asserts to make sure that is the case.
We also make sure that b_next_transaction is NULL in __jbd2_journal_unfile_buffer() since the callers expect that as well and we should not get into that stage in this state anyway, leading to problems later on if we do.
Tested with fstests.
Signed-off-by: Lukas Czerner lczerner@redhat.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20200617092549.6712-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/jbd2/transaction.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 43693b6797105..5a0de78a5d71a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1915,6 +1915,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) */ static void __jbd2_journal_unfile_buffer(struct journal_head *jh) { + J_ASSERT_JH(jh, jh->b_transaction != NULL); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + __jbd2_journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; jbd2_journal_put_journal_head(jh); @@ -2462,6 +2465,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh); __jbd2_journal_temp_unlink_buffer(jh); + + /* + * b_transaction must be set, otherwise the new b_transaction won't + * be holding jh reference + */ + J_ASSERT_JH(jh, jh->b_transaction != NULL); + /* * We set b_transaction here because b_next_transaction will inherit * our jh reference and thus __jbd2_journal_file_buffer() must not
From: Jan Kara jack@suse.cz
[ Upstream commit 11215630aada28307ba555a43138db6ac54fa825 ]
A customer has reported a BUG_ON in ext4_clear_journal_err() hitting during an LTP testing. Either this has been caused by a test setup issue where the filesystem was being overwritten while LTP was mounting it or the journal replay has overwritten the superblock with invalid data. In either case it is preferable we don't take the machine down with a BUG_ON. So handle the situation of unexpectedly missing has_journal feature more gracefully. We issue warning and fail the mount in the cases where the race window is narrow and the failed check is most likely a programming error. In cases where fs corruption is more likely, we do full ext4_error() handling before failing mount / remount.
Reviewed-by: Lukas Czerner lczerner@redhat.com Signed-off-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20200710140759.18031-1-jack@suse.cz Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/super.c | 68 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 21 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1428dab2afff8..47e406338a322 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -65,10 +65,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_show_options(struct seq_file *seq, struct dentry *root); static int ext4_commit_super(struct super_block *sb, int sync); -static void ext4_mark_recovery_complete(struct super_block *sb, +static int ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); -static void ext4_clear_journal_err(struct super_block *sb, - struct ext4_super_block *es); +static int ext4_clear_journal_err(struct super_block *sb, + struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); @@ -4545,7 +4545,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; if (needs_recovery) { ext4_msg(sb, KERN_INFO, "recovery complete"); - ext4_mark_recovery_complete(sb, es); + err = ext4_mark_recovery_complete(sb, es); + if (err) + goto failed_mount8; } if (EXT4_SB(sb)->s_journal) { if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) @@ -4588,10 +4590,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount;
-#ifdef CONFIG_QUOTA failed_mount8: ext4_unregister_sysfs(sb); -#endif failed_mount7: ext4_unregister_li_request(sb); failed_mount6: @@ -4727,7 +4727,8 @@ static journal_t *ext4_get_journal(struct super_block *sb, struct inode *journal_inode; journal_t *journal;
- BUG_ON(!ext4_has_feature_journal(sb)); + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) + return NULL;
journal_inode = ext4_get_journal_inode(sb, journal_inum); if (!journal_inode) @@ -4757,7 +4758,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, struct ext4_super_block *es; struct block_device *bdev;
- BUG_ON(!ext4_has_feature_journal(sb)); + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) + return NULL;
bdev = ext4_blkdev_get(j_dev, sb); if (bdev == NULL) @@ -4849,7 +4851,8 @@ static int ext4_load_journal(struct super_block *sb, int err = 0; int really_read_only;
- BUG_ON(!ext4_has_feature_journal(sb)); + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) + return -EFSCORRUPTED;
if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { @@ -4919,7 +4922,12 @@ static int ext4_load_journal(struct super_block *sb, }
EXT4_SB(sb)->s_journal = journal; - ext4_clear_journal_err(sb, es); + err = ext4_clear_journal_err(sb, es); + if (err) { + EXT4_SB(sb)->s_journal = NULL; + jbd2_journal_destroy(journal); + return err; + }
if (!really_read_only && journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { @@ -5015,26 +5023,32 @@ static int ext4_commit_super(struct super_block *sb, int sync) * remounting) the filesystem readonly, then we will end up with a * consistent fs on disk. Record that fact. */ -static void ext4_mark_recovery_complete(struct super_block *sb, - struct ext4_super_block *es) +static int ext4_mark_recovery_complete(struct super_block *sb, + struct ext4_super_block *es) { + int err; journal_t *journal = EXT4_SB(sb)->s_journal;
if (!ext4_has_feature_journal(sb)) { - BUG_ON(journal != NULL); - return; + if (journal != NULL) { + ext4_error(sb, "Journal got removed while the fs was " + "mounted!"); + return -EFSCORRUPTED; + } + return 0; } jbd2_journal_lock_updates(journal); - if (jbd2_journal_flush(journal) < 0) + err = jbd2_journal_flush(journal); + if (err < 0) goto out;
if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) { ext4_clear_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); } - out: jbd2_journal_unlock_updates(journal); + return err; }
/* @@ -5042,14 +5056,17 @@ static void ext4_mark_recovery_complete(struct super_block *sb, * has recorded an error from a previous lifetime, move that error to the * main filesystem now. */ -static void ext4_clear_journal_err(struct super_block *sb, +static int ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es) { journal_t *journal; int j_errno; const char *errstr;
- BUG_ON(!ext4_has_feature_journal(sb)); + if (!ext4_has_feature_journal(sb)) { + ext4_error(sb, "Journal got removed while the fs was mounted!"); + return -EFSCORRUPTED; + }
journal = EXT4_SB(sb)->s_journal;
@@ -5074,6 +5091,7 @@ static void ext4_clear_journal_err(struct super_block *sb, jbd2_journal_clear_err(journal); jbd2_journal_update_sb_errno(journal); } + return 0; }
/* @@ -5344,8 +5362,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) (sbi->s_mount_state & EXT4_VALID_FS)) es->s_state = cpu_to_le16(sbi->s_mount_state);
- if (sbi->s_journal) + if (sbi->s_journal) { + /* + * We let remount-ro finish even if marking fs + * as clean failed... + */ ext4_mark_recovery_complete(sb, es); + } if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); } else { @@ -5393,8 +5416,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * been changed by e2fsck since we originally mounted * the partition.) */ - if (sbi->s_journal) - ext4_clear_journal_err(sb, es); + if (sbi->s_journal) { + err = ext4_clear_journal_err(sb, es); + if (err) + goto restore_opts; + } sbi->s_mount_state = le16_to_cpu(es->s_state);
err = ext4_setup_super(sb, es, 0);
From: Lukas Czerner lczerner@redhat.com
[ Upstream commit 273108fa5015eeffc4bacfa5ce272af3434b96e4 ]
Ext4 uses blkdev_get_by_dev() to get the block_device for journal device which does check to see if the read-only block device was opened read-only.
As a result ext4 will hapily proceed mounting the file system with external journal on read-only device. This is bad as we would not be able to use the journal leading to errors later on.
Instead of simply failing to mount file system in this case, treat it in a similar way we treat internal journal on read-only device. Allow to mount with -o noload in read-only mode.
This can be reproduced easily like this:
mke2fs -F -O journal_dev $JOURNAL_DEV 100M mkfs.$FSTYPE -F -J device=$JOURNAL_DEV $FS_DEV blockdev --setro $JOURNAL_DEV mount $FS_DEV $MNT touch $MNT/file umount $MNT
leading to error like this
[ 1307.318713] ------------[ cut here ]------------ [ 1307.323362] generic_make_request: Trying to write to read-only block-device dm-2 (partno 0) [ 1307.331741] WARNING: CPU: 36 PID: 3224 at block/blk-core.c:855 generic_make_request_checks+0x2c3/0x580 [ 1307.341041] Modules linked in: ext4 mbcache jbd2 rfkill intel_rapl_msr intel_rapl_common isst_if_commd [ 1307.419445] CPU: 36 PID: 3224 Comm: jbd2/dm-2 Tainted: G W I 5.8.0-rc5 #2 [ 1307.427359] Hardware name: Dell Inc. PowerEdge R740/01KPX8, BIOS 2.3.10 08/15/2019 [ 1307.434932] RIP: 0010:generic_make_request_checks+0x2c3/0x580 [ 1307.440676] Code: 94 03 00 00 48 89 df 48 8d 74 24 08 c6 05 cf 2b 18 01 01 e8 7f a4 ff ff 48 c7 c7 50e [ 1307.459420] RSP: 0018:ffffc0d70eb5fb48 EFLAGS: 00010286 [ 1307.464646] RAX: 0000000000000000 RBX: ffff9b33b2978300 RCX: 0000000000000000 [ 1307.471780] RDX: ffff9b33e12a81e0 RSI: ffff9b33e1298000 RDI: ffff9b33e1298000 [ 1307.478913] RBP: ffff9b7b9679e0c0 R08: 0000000000000837 R09: 0000000000000024 [ 1307.486044] R10: 0000000000000000 R11: ffffc0d70eb5f9f0 R12: 0000000000000400 [ 1307.493177] R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000000 [ 1307.500308] FS: 0000000000000000(0000) GS:ffff9b33e1280000(0000) knlGS:0000000000000000 [ 1307.508396] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1307.514142] CR2: 000055eaf4109000 CR3: 0000003dee40a006 CR4: 00000000007606e0 [ 1307.521273] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1307.528407] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1307.535538] PKRU: 55555554 [ 1307.538250] Call Trace: [ 1307.540708] generic_make_request+0x30/0x340 [ 1307.544985] submit_bio+0x43/0x190 [ 1307.548393] ? bio_add_page+0x62/0x90 [ 1307.552068] submit_bh_wbc+0x16a/0x190 [ 1307.555833] jbd2_write_superblock+0xec/0x200 [jbd2] [ 1307.560803] jbd2_journal_update_sb_log_tail+0x65/0xc0 [jbd2] [ 1307.566557] jbd2_journal_commit_transaction+0x2ae/0x1860 [jbd2] [ 1307.572566] ? check_preempt_curr+0x7a/0x90 [ 1307.576756] ? update_curr+0xe1/0x1d0 [ 1307.580421] ? account_entity_dequeue+0x7b/0xb0 [ 1307.584955] ? newidle_balance+0x231/0x3d0 [ 1307.589056] ? __switch_to_asm+0x42/0x70 [ 1307.592986] ? __switch_to_asm+0x36/0x70 [ 1307.596918] ? lock_timer_base+0x67/0x80 [ 1307.600851] kjournald2+0xbd/0x270 [jbd2] [ 1307.604873] ? finish_wait+0x80/0x80 [ 1307.608460] ? commit_timeout+0x10/0x10 [jbd2] [ 1307.612915] kthread+0x114/0x130 [ 1307.616152] ? kthread_park+0x80/0x80 [ 1307.619816] ret_from_fork+0x22/0x30 [ 1307.623400] ---[ end trace 27490236265b1630 ]---
Signed-off-by: Lukas Czerner lczerner@redhat.com Reviewed-by: Andreas Dilger adilger@dilger.ca Link: https://lore.kernel.org/r/20200717090605.2612-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/super.c | 51 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 18 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 47e406338a322..23ef8fbdb582f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4850,6 +4850,7 @@ static int ext4_load_journal(struct super_block *sb, dev_t journal_dev; int err = 0; int really_read_only; + int journal_dev_ro;
if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) return -EFSCORRUPTED; @@ -4862,7 +4863,31 @@ static int ext4_load_journal(struct super_block *sb, } else journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
- really_read_only = bdev_read_only(sb->s_bdev); + if (journal_inum && journal_dev) { + ext4_msg(sb, KERN_ERR, + "filesystem has both journal inode and journal device!"); + return -EINVAL; + } + + if (journal_inum) { + journal = ext4_get_journal(sb, journal_inum); + if (!journal) + return -EINVAL; + } else { + journal = ext4_get_dev_journal(sb, journal_dev); + if (!journal) + return -EINVAL; + } + + journal_dev_ro = bdev_read_only(journal->j_dev); + really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro; + + if (journal_dev_ro && !sb_rdonly(sb)) { + ext4_msg(sb, KERN_ERR, + "journal device read-only, try mounting with '-o ro'"); + err = -EROFS; + goto err_out; + }
/* * Are we loading a blank journal or performing recovery after a @@ -4877,27 +4902,14 @@ static int ext4_load_journal(struct super_block *sb, ext4_msg(sb, KERN_ERR, "write access " "unavailable, cannot proceed " "(try mounting with noload)"); - return -EROFS; + err = -EROFS; + goto err_out; } ext4_msg(sb, KERN_INFO, "write access will " "be enabled during recovery"); } }
- if (journal_inum && journal_dev) { - ext4_msg(sb, KERN_ERR, "filesystem has both journal " - "and inode journals!"); - return -EINVAL; - } - - if (journal_inum) { - if (!(journal = ext4_get_journal(sb, journal_inum))) - return -EINVAL; - } else { - if (!(journal = ext4_get_dev_journal(sb, journal_dev))) - return -EINVAL; - } - if (!(journal->j_flags & JBD2_BARRIER)) ext4_msg(sb, KERN_INFO, "barriers disabled");
@@ -4917,8 +4929,7 @@ static int ext4_load_journal(struct super_block *sb,
if (err) { ext4_msg(sb, KERN_ERR, "error loading journal"); - jbd2_journal_destroy(journal); - return err; + goto err_out; }
EXT4_SB(sb)->s_journal = journal; @@ -4938,6 +4949,10 @@ static int ext4_load_journal(struct super_block *sb, }
return 0; + +err_out: + jbd2_journal_destroy(journal); + return err; }
static int ext4_commit_super(struct super_block *sb, int sync)
From: "zhangyi (F)" yi.zhang@huawei.com
[ Upstream commit c044f3d8360d2ecf831ba2cc9f08cf9fb2c699fb ]
If we free a metadata buffer which has been failed to async write out in the background, the jbd2 checkpoint procedure will not detect this failure in jbd2_log_do_checkpoint(), so it may lead to filesystem inconsistency after cleanup journal tail. This patch abort the journal if free a buffer has write_io_error flag to prevent potential further inconsistency.
Signed-off-by: zhangyi (F) yi.zhang@huawei.com Link: https://lore.kernel.org/r/20200620025427.1756360-5-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/jbd2/transaction.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 5a0de78a5d71a..8c305593fb51f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2009,6 +2009,7 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, { struct buffer_head *head; struct buffer_head *bh; + bool has_write_io_error = false; int ret = 0;
J_ASSERT(PageLocked(page)); @@ -2033,11 +2034,26 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, jbd_unlock_bh_state(bh); if (buffer_jbd(bh)) goto busy; + + /* + * If we free a metadata buffer which has been failed to + * write out, the jbd2 checkpoint procedure will not detect + * this failure and may lead to filesystem inconsistency + * after cleanup journal tail. + */ + if (buffer_write_io_error(bh)) { + pr_err("JBD2: Error while async write back metadata bh %llu.", + (unsigned long long)bh->b_blocknr); + has_write_io_error = true; + } } while ((bh = bh->b_this_page) != head);
ret = try_to_free_buffers(page);
busy: + if (has_write_io_error) + jbd2_journal_abort(journal, -EIO); + return ret; }
From: Lukas Czerner lczerner@redhat.com
[ Upstream commit f25391ebb475d3ffb3aa61bb90e3594c841749ef ]
Currently there is a problem with mount options that can be both set by vfs using mount flags or by a string parsing in ext4.
i_version/iversion options gets lost after remount, for example
$ mount -o i_version /dev/pmem0 /mnt $ grep pmem0 /proc/self/mountinfo | grep i_version 310 95 259:0 / /mnt rw,relatime shared:163 - ext4 /dev/pmem0 rw,seclabel,i_version $ mount -o remount,ro /mnt $ grep pmem0 /proc/self/mountinfo | grep i_version
nolazytime gets ignored by ext4 on remount, for example
$ mount -o lazytime /dev/pmem0 /mnt $ grep pmem0 /proc/self/mountinfo | grep lazytime 310 95 259:0 / /mnt rw,relatime shared:163 - ext4 /dev/pmem0 rw,lazytime,seclabel $ mount -o remount,nolazytime /mnt $ grep pmem0 /proc/self/mountinfo | grep lazytime 310 95 259:0 / /mnt rw,relatime shared:163 - ext4 /dev/pmem0 rw,lazytime,seclabel
Fix it by applying the SB_LAZYTIME and SB_I_VERSION flags from *flags to s_flags before we parse the option and use the resulting state of the same flags in *flags at the end of successful remount.
Signed-off-by: Lukas Czerner lczerner@redhat.com Reviewed-by: Ritesh Harjani riteshh@linux.ibm.com Link: https://lore.kernel.org/r/20200723150526.19931-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/super.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 23ef8fbdb582f..03ebb0b385467 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5249,7 +5249,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) { struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned long old_sb_flags; + unsigned long old_sb_flags, vfs_flags; struct ext4_mount_options old_opts; int enable_quota = 0; ext4_group_t g; @@ -5292,6 +5292,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal && sbi->s_journal->j_task->io_context) journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
+ /* + * Some options can be enabled by ext4 and/or by VFS mount flag + * either way we need to make sure it matches in both *flags and + * s_flags. Copy those selected flags from *flags to s_flags + */ + vfs_flags = SB_LAZYTIME | SB_I_VERSION; + sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); + if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { err = -EINVAL; goto restore_opts; @@ -5345,9 +5353,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); }
- if (*flags & SB_LAZYTIME) - sb->s_flags |= SB_LAZYTIME; - if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; @@ -5487,7 +5492,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } #endif
- *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); + /* + * Some options can be enabled by ext4 and/or by VFS mount flag + * either way we need to make sure it matches in both *flags and + * s_flags. Copy those selected flags from s_flags to *flags + */ + *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); + ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); kfree(orig_data); return 0;
From: Jan Kara jack@suse.cz
[ Upstream commit d176b1f62f242ab259ff665a26fbac69db1aecba ]
ext4_setup_system_zone() can fail. Handle the failure in ext4_remount().
Reviewed-by: Lukas Czerner lczerner@redhat.com Signed-off-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20200728130437.7804-2-jack@suse.cz Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 03ebb0b385467..daabd7a2cee81 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5470,7 +5470,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_register_li_request(sb, first_not_zeroed); }
- ext4_setup_system_zone(sb); + err = ext4_setup_system_zone(sb); + if (err) + goto restore_opts; + if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) { err = ext4_commit_super(sb, 1); if (err)
From: Jan Kara jack@suse.cz
[ Upstream commit 0f5bde1db174f6c471f0bd27198575719dabe3e5 ]
When remounting filesystem fails late during remount handling and block_validity mount option is also changed during the remount, we fail to restore system zone information to a state matching the mount option. This is mostly harmless, just the block validity checking will not match the situation described by the mount option. Make sure these two are always consistent.
Reported-by: Lukas Czerner lczerner@redhat.com Reviewed-by: Lukas Czerner lczerner@redhat.com Signed-off-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20200728130437.7804-7-jack@suse.cz Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/block_validity.c | 8 -------- fs/ext4/super.c | 29 +++++++++++++++++++++-------- 2 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index d203cc935ff83..9d5a6bc98a71a 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -260,14 +260,6 @@ int ext4_setup_system_zone(struct super_block *sb) int flex_size = ext4_flex_bg_size(sbi); int ret;
- if (!test_opt(sb, BLOCK_VALIDITY)) { - if (sbi->system_blks) - ext4_release_system_zone(sb); - return 0; - } - if (sbi->system_blks) - return 0; - system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL); if (!system_blks) return -ENOMEM; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index daabd7a2cee81..9ac34b6ae0731 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4473,11 +4473,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_set_resv_clusters(sb);
- err = ext4_setup_system_zone(sb); - if (err) { - ext4_msg(sb, KERN_ERR, "failed to initialize system " - "zone (%d)", err); - goto failed_mount4a; + if (test_opt(sb, BLOCK_VALIDITY)) { + err = ext4_setup_system_zone(sb); + if (err) { + ext4_msg(sb, KERN_ERR, "failed to initialize system " + "zone (%d)", err); + goto failed_mount4a; + } }
ext4_ext_init(sb); @@ -5470,9 +5472,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_register_li_request(sb, first_not_zeroed); }
- err = ext4_setup_system_zone(sb); - if (err) - goto restore_opts; + /* + * Handle creation of system zone data early because it can fail. + * Releasing of existing data is done when we are sure remount will + * succeed. + */ + if (test_opt(sb, BLOCK_VALIDITY) && !sbi->system_blks) { + err = ext4_setup_system_zone(sb); + if (err) + goto restore_opts; + }
if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) { err = ext4_commit_super(sb, 1); @@ -5494,6 +5503,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } } #endif + if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks) + ext4_release_system_zone(sb);
/* * Some options can be enabled by ext4 and/or by VFS mount flag @@ -5515,6 +5526,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) sbi->s_commit_interval = old_opts.s_commit_interval; sbi->s_min_batch_time = old_opts.s_min_batch_time; sbi->s_max_batch_time = old_opts.s_max_batch_time; + if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks) + ext4_release_system_zone(sb); #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; for (i = 0; i < EXT4_MAXQUOTAS; i++) {
From: Xianting Tian xianting_tian@126.com
[ Upstream commit 377254b2cd2252c7c3151b113cbdf93a7736c2e9 ]
If a device is hot-removed --- for example, when a physical device is unplugged from pcie slot or a nbd device's network is shutdown --- this can result in a BUG_ON() crash in submit_bh_wbc(). This is because the when the block device dies, the buffer heads will have their Buffer_Mapped flag get cleared, leading to the crash in submit_bh_wbc.
We had attempted to work around this problem in commit a17712c8 ("ext4: check superblock mapped prior to committing"). Unfortunately, it's still possible to hit the BUG_ON(!buffer_mapped(bh)) if the device dies between when the work-around check in ext4_commit_super() and when submit_bh_wbh() is finally called:
Code path: ext4_commit_super judge if 'buffer_mapped(sbh)' is false, return <== commit a17712c8 lock_buffer(sbh) ... unlock_buffer(sbh) __sync_dirty_buffer(sbh,... lock_buffer(sbh) judge if 'buffer_mapped(sbh))' is false, return <== added by this patch submit_bh(...,sbh) submit_bh_wbc(...,sbh,...)
[100722.966497] kernel BUG at fs/buffer.c:3095! <== BUG_ON(!buffer_mapped(bh))' in submit_bh_wbc() [100722.966503] invalid opcode: 0000 [#1] SMP [100722.966566] task: ffff8817e15a9e40 task.stack: ffffc90024744000 [100722.966574] RIP: 0010:submit_bh_wbc+0x180/0x190 [100722.966575] RSP: 0018:ffffc90024747a90 EFLAGS: 00010246 [100722.966576] RAX: 0000000000620005 RBX: ffff8818a80603a8 RCX: 0000000000000000 [100722.966576] RDX: ffff8818a80603a8 RSI: 0000000000020800 RDI: 0000000000000001 [100722.966577] RBP: ffffc90024747ac0 R08: 0000000000000000 R09: ffff88207f94170d [100722.966578] R10: 00000000000437c8 R11: 0000000000000001 R12: 0000000000020800 [100722.966578] R13: 0000000000000001 R14: 000000000bf9a438 R15: ffff88195f333000 [100722.966580] FS: 00007fa2eee27700(0000) GS:ffff88203d840000(0000) knlGS:0000000000000000 [100722.966580] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [100722.966581] CR2: 0000000000f0b008 CR3: 000000201a622003 CR4: 00000000007606e0 [100722.966582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [100722.966583] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [100722.966583] PKRU: 55555554 [100722.966583] Call Trace: [100722.966588] __sync_dirty_buffer+0x6e/0xd0 [100722.966614] ext4_commit_super+0x1d8/0x290 [ext4] [100722.966626] __ext4_std_error+0x78/0x100 [ext4] [100722.966635] ? __ext4_journal_get_write_access+0xca/0x120 [ext4] [100722.966646] ext4_reserve_inode_write+0x58/0xb0 [ext4] [100722.966655] ? ext4_dirty_inode+0x48/0x70 [ext4] [100722.966663] ext4_mark_inode_dirty+0x53/0x1e0 [ext4] [100722.966671] ? __ext4_journal_start_sb+0x6d/0xf0 [ext4] [100722.966679] ext4_dirty_inode+0x48/0x70 [ext4] [100722.966682] __mark_inode_dirty+0x17f/0x350 [100722.966686] generic_update_time+0x87/0xd0 [100722.966687] touch_atime+0xa9/0xd0 [100722.966690] generic_file_read_iter+0xa09/0xcd0 [100722.966694] ? page_cache_tree_insert+0xb0/0xb0 [100722.966704] ext4_file_read_iter+0x4a/0x100 [ext4] [100722.966707] ? __inode_security_revalidate+0x4f/0x60 [100722.966709] __vfs_read+0xec/0x160 [100722.966711] vfs_read+0x8c/0x130 [100722.966712] SyS_pread64+0x87/0xb0 [100722.966716] do_syscall_64+0x67/0x1b0 [100722.966719] entry_SYSCALL64_slow_path+0x25/0x25
To address this, add the check of 'buffer_mapped(bh)' to __sync_dirty_buffer(). This also has the benefit of fixing this for other file systems.
With this addition, we can drop the workaround in ext4_commit_supper().
[ Commit description rewritten by tytso. ]
Signed-off-by: Xianting Tian xianting_tian@126.com Link: https://lore.kernel.org/r/1596211825-8750-1-git-send-email-xianting_tian@126... Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/buffer.c | 9 +++++++++ fs/ext4/super.c | 7 ------- 2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c index c49fdab5cb36e..362a868764599 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3193,6 +3193,15 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags) WARN_ON(atomic_read(&bh->b_count) < 1); lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { + /* + * The bh should be mapped, but it might not be if the + * device was hot-removed. Not much we can do but fail the I/O. + */ + if (!buffer_mapped(bh)) { + unlock_buffer(bh); + return -EIO; + } + get_bh(bh); bh->b_end_io = end_buffer_write_sync; ret = submit_bh(REQ_OP_WRITE, op_flags, bh); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ac34b6ae0731..0c15ff19acbd4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4966,13 +4966,6 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (!sbh || block_device_ejected(sb)) return error;
- /* - * The superblock bh should be mapped, but it might not be if the - * device was hot-removed. Not much we can do but fail the I/O. - */ - if (!buffer_mapped(sbh)) - return error; - /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock
From: Amelie Delaunay amelie.delaunay@st.com
[ Upstream commit 9cc61973bf9385b19ff5dda4a2a7e265fcba85e4 ]
Fix spi->clk_rate when it is odd to the nearest lowest even value because minimum SPI divider is 2.
Signed-off-by: Amelie Delaunay amelie.delaunay@st.com Signed-off-by: Alain Volmat alain.volmat@st.com Link: https://lore.kernel.org/r/1597043558-29668-4-git-send-email-alain.volmat@st.... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/spi/spi-stm32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index ad1e55d3d5d59..391a20b3d2fda 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -254,7 +254,8 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz) { u32 div, mbrdiv;
- div = DIV_ROUND_UP(spi->clk_rate, speed_hz); + /* Ensure spi->clk_rate is even */ + div = DIV_ROUND_UP(spi->clk_rate & ~0x1, speed_hz);
/* * SPI framework set xfer->speed_hz to master->max_speed_hz if
From: Vineeth Vijayan vneethv@linux.ibm.com
[ Upstream commit 0b8eb2ee9da1e8c9b8082f404f3948aa82a057b2 ]
The scanning through subchannels during the time of an event could take significant amount of time in case of platforms with lots of known subchannels. This might result in higher scheduling latencies for other tasks especially on systems with a single CPU. Add cond_resched() call, as the loop in slow_eval_known_fn() can be executed for a longer duration.
Reviewed-by: Peter Oberparleiter oberpar@linux.ibm.com Signed-off-by: Vineeth Vijayan vneethv@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/cio/css.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index df09ed53ab459..825a8f2703b4f 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -615,6 +615,11 @@ static int slow_eval_known_fn(struct subchannel *sch, void *data) rc = css_evaluate_known_subchannel(sch, 1); if (rc == -EAGAIN) css_schedule_eval(sch->schid); + /* + * The loop might take long time for platforms with lots of + * known devices. Allow scheduling here. + */ + cond_resched(); } return 0; }
From: Sylwester Nawrocki s.nawrocki@samsung.com
[ Upstream commit f082bb59b72039a2326ec1a44496899fb8aa6d0e ]
The driver supports WM1811, WM8994, WM8958 devices but according to documentation and the regmap definitions the WM8958_DSP2_* registers are only available on WM8958. In current code these registers are being accessed as if they were available on all the three chips.
When starting playback on WM1811 CODEC multiple errors like: "wm8994-codec wm8994-codec: ASoC: error at soc_component_read_no_lock on wm8994-codec: -5" can be seen, which is caused by attempts to read an unavailable WM8958_DSP2_PROGRAM register. The issue has been uncovered by recent commit "e2329ee ASoC: soc-component: add soc_component_err()".
This patch adds a check in wm8958_aif_ev() callback so the DSP2 handling is only done for WM8958.
Signed-off-by: Sylwester Nawrocki s.nawrocki@samsung.com Acked-by: Charles Keepax ckeepax@opensource.cirrus.com Link: https://lore.kernel.org/r/20200731173834.23832-1-s.nawrocki@samsung.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/codecs/wm8958-dsp2.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c index 108e8bf42a346..f0a409504a13b 100644 --- a/sound/soc/codecs/wm8958-dsp2.c +++ b/sound/soc/codecs/wm8958-dsp2.c @@ -419,8 +419,12 @@ int wm8958_aif_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct wm8994 *control = dev_get_drvdata(component->dev->parent); int i;
+ if (control->type != WM8958) + return 0; + switch (event) { case SND_SOC_DAPM_POST_PMU: case SND_SOC_DAPM_PRE_PMU:
From: Mike Christie michael.christie@oracle.com
[ Upstream commit fa39ab5184d64563cd36f2fb5f0d3fbad83a432c ]
ixgbe_fcoe_ddp_setup() can be called from the main I/O path and is called with a spin_lock held, so we have to use GFP_ATOMIC allocation instead of GFP_KERNEL.
Link: https://lore.kernel.org/r/1596831813-9839-1-git-send-email-michael.christie@... cc: Hannes Reinecke hare@suse.de Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index ccd852ad62a4b..d50c5b55da180 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -192,7 +192,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid, }
/* alloc the udl from per cpu ddp pool */ - ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_KERNEL, &ddp->udp); + ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp); if (!ddp->udl) { e_err(drv, "failed allocated ddp context\n"); goto out_noddp_unmap;
From: Stanley Chu stanley.chu@mediatek.com
[ Upstream commit 93b6c5db06028a3b55122bbb74d0715dd8ca4ae0 ]
In ufshcd_suspend(), after clk-gating is suspended and link is set as Hibern8 state, ufshcd_hold() is still possibly invoked before ufshcd_suspend() returns. For example, MediaTek's suspend vops may issue UIC commands which would call ufshcd_hold() during the command issuing flow.
Now if UFSHCD_CAP_HIBERN8_WITH_CLK_GATING capability is enabled, then ufshcd_hold() may enter infinite loops because there is no clk-ungating work scheduled or pending. In this case, ufshcd_hold() shall just bypass, and keep the link as Hibern8 state.
Link: https://lore.kernel.org/r/20200809050734.18740-1-stanley.chu@mediatek.com Reviewed-by: Avri Altman avri.altman@wdc.com Co-developed-by: Andy Teng andy.teng@mediatek.com Signed-off-by: Andy Teng andy.teng@mediatek.com Signed-off-by: Stanley Chu stanley.chu@mediatek.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/ufs/ufshcd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index bd21c9cdf8183..ff6912f9f49f7 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1538,6 +1538,7 @@ static void ufshcd_ungate_work(struct work_struct *work) int ufshcd_hold(struct ufs_hba *hba, bool async) { int rc = 0; + bool flush_result; unsigned long flags;
if (!ufshcd_is_clkgating_allowed(hba)) @@ -1569,7 +1570,9 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) break; } spin_unlock_irqrestore(hba->host->host_lock, flags); - flush_work(&hba->clk_gating.ungate_work); + flush_result = flush_work(&hba->clk_gating.ungate_work); + if (hba->clk_gating.is_suspended && !flush_result) + goto out; spin_lock_irqsave(hba->host->host_lock, flags); goto start; }
From: Adrian Hunter adrian.hunter@intel.com
[ Upstream commit 127d5f7c4b653b8be5eb3b2c7bbe13728f9003ff ]
For shared interrupts, the interrupt status might be zero, so check that first.
Link: https://lore.kernel.org/r/20200811133936.19171-2-adrian.hunter@intel.com Reviewed-by: Avri Altman avri.altman@wdc.com Signed-off-by: Adrian Hunter adrian.hunter@intel.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/ufs/ufshcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index ff6912f9f49f7..72879280df776 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5603,7 +5603,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status) */ static irqreturn_t ufshcd_intr(int irq, void *__hba) { - u32 intr_status, enabled_intr_status; + u32 intr_status, enabled_intr_status = 0; irqreturn_t retval = IRQ_NONE; struct ufs_hba *hba = __hba; int retries = hba->nutrs; @@ -5617,7 +5617,7 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) * read, make sure we handle them by checking the interrupt status * again in a loop until we process all of the reqs before returning. */ - do { + while (intr_status && retries--) { enabled_intr_status = intr_status & ufshcd_readl(hba, REG_INTERRUPT_ENABLE); if (intr_status) @@ -5628,7 +5628,7 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) }
intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); - } while (intr_status && --retries); + }
spin_unlock(hba->host->host_lock); return retval;
From: Stanley Chu stanley.chu@mediatek.com
[ Upstream commit b10178ee7fa88b68a9e8adc06534d2605cb0ec23 ]
If somehow no interrupt notification is raised for a completed request and its doorbell bit is cleared by host, UFS driver needs to cleanup its outstanding bit in ufshcd_abort(). Otherwise, system may behave abnormally in the following scenario:
After ufshcd_abort() returns, this request will be requeued by SCSI layer with its outstanding bit set. Any future completed request will trigger ufshcd_transfer_req_compl() to handle all "completed outstanding bits". At this time the "abnormal outstanding bit" will be detected and the "requeued request" will be chosen to execute request post-processing flow. This is wrong because this request is still "alive".
Link: https://lore.kernel.org/r/20200811141859.27399-2-huobean@gmail.com Reviewed-by: Can Guo cang@codeaurora.org Acked-by: Avri Altman avri.altman@wdc.com Signed-off-by: Stanley Chu stanley.chu@mediatek.com Signed-off-by: Bean Huo beanhuo@micron.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/ufs/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 72879280df776..546bf4f960f91 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5928,7 +5928,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) /* command completed already */ dev_err(hba->dev, "%s: cmd at tag %d successfully cleared from DB.\n", __func__, tag); - goto out; + goto cleanup; } else { dev_err(hba->dev, "%s: no response from device. tag = %d, err %d\n", @@ -5962,6 +5962,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) goto out; }
+cleanup: scsi_dma_unmap(cmd);
spin_lock_irqsave(host->host_lock, flags);
From: Saurav Kashyap skashyap@marvell.com
[ Upstream commit dffa11453313a115157b19021cc2e27ea98e624c ]
OS boot during Boot from SAN was stuck at dracut emergency shell after enabling NVMe driver parameter. For non-MQ support the driver was enabling MQ. Add a check to confirm if FW supports MQ.
Link: https://lore.kernel.org/r/20200806111014.28434-9-njavali@marvell.com Reviewed-by: Himanshu Madhani himanshu.madhani@oracle.com Signed-off-by: Saurav Kashyap skashyap@marvell.com Signed-off-by: Nilesh Javali njavali@marvell.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/qla2xxx/qla_os.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index b56cf790587e5..e17ca7df8d0e4 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1997,6 +1997,11 @@ qla2x00_iospace_config(struct qla_hw_data *ha) /* Determine queue resources */ ha->max_req_queues = ha->max_rsp_queues = 1; ha->msix_count = QLA_BASE_VECTORS; + + /* Check if FW supports MQ or not */ + if (!(ha->fw_attributes & BIT_6)) + goto mqiobase_exit; + if (!ql2xmqsupport || !ql2xnvmeenable || (!IS_QLA25XX(ha) && !IS_QLA81XX(ha))) goto mqiobase_exit;
From: Quinn Tran qutran@marvell.com
[ Upstream commit 83949613fac61e8e37eadf8275bf072342302f4e ]
NVMEAsync command is being submitted to QLA while the same NVMe controller is in the middle of reset. The reset path has deleted the association and freed aen_op->fcp_req.private. Add a check for this private pointer before issuing the command.
... 6 [ffffb656ca11fce0] page_fault at ffffffff8c00114e [exception RIP: qla_nvme_post_cmd+394] RIP: ffffffffc0d012ba RSP: ffffb656ca11fd98 RFLAGS: 00010206 RAX: ffff8fb039eda228 RBX: ffff8fb039eda200 RCX: 00000000000da161 RDX: ffffffffc0d4d0f0 RSI: ffffffffc0d26c9b RDI: ffff8fb039eda220 RBP: 0000000000000013 R8: ffff8fb47ff6aa80 R9: 0000000000000002 R10: 0000000000000000 R11: ffffb656ca11fdc8 R12: ffff8fb27d04a3b0 R13: ffff8fc46dd98a58 R14: 0000000000000000 R15: ffff8fc4540f0000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 7 [ffffb656ca11fe08] nvme_fc_start_fcp_op at ffffffffc0241568 [nvme_fc] 8 [ffffb656ca11fe50] nvme_fc_submit_async_event at ffffffffc0241901 [nvme_fc] 9 [ffffb656ca11fe68] nvme_async_event_work at ffffffffc014543d [nvme_core] 10 [ffffb656ca11fe98] process_one_work at ffffffff8b6cd437 11 [ffffb656ca11fed8] worker_thread at ffffffff8b6cdcef 12 [ffffb656ca11ff10] kthread at ffffffff8b6d3402 13 [ffffb656ca11ff50] ret_from_fork at ffffffff8c000255
-- PID: 37824 TASK: ffff8fb033063d80 CPU: 20 COMMAND: "kworker/u97:451" 0 [ffffb656ce1abc28] __schedule at ffffffff8be629e3 1 [ffffb656ce1abcc8] schedule at ffffffff8be62fe8 2 [ffffb656ce1abcd0] schedule_timeout at ffffffff8be671ed 3 [ffffb656ce1abd70] wait_for_completion at ffffffff8be639cf 4 [ffffb656ce1abdd0] flush_work at ffffffff8b6ce2d5 5 [ffffb656ce1abe70] nvme_stop_ctrl at ffffffffc0144900 [nvme_core] 6 [ffffb656ce1abe80] nvme_fc_reset_ctrl_work at ffffffffc0243445 [nvme_fc] 7 [ffffb656ce1abe98] process_one_work at ffffffff8b6cd437 8 [ffffb656ce1abed8] worker_thread at ffffffff8b6cdb50 9 [ffffb656ce1abf10] kthread at ffffffff8b6d3402 10 [ffffb656ce1abf50] ret_from_fork at ffffffff8c000255
Link: https://lore.kernel.org/r/20200806111014.28434-10-njavali@marvell.com Reviewed-by: Himanshu Madhani himanshu.madhani@oracle.com Signed-off-by: Quinn Tran qutran@marvell.com Signed-off-by: Nilesh Javali njavali@marvell.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/qla2xxx/qla_nvme.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 5590d6e8b5762..3e2f8ce1d9a97 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -477,6 +477,11 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, struct nvme_private *priv = fd->private; struct qla_nvme_rport *qla_rport = rport->private;
+ if (!priv) { + /* nvme association has been torn down */ + return rval; + } + fcport = qla_rport->fcport;
vha = fcport->vha;
From: Saurav Kashyap skashyap@marvell.com
[ Upstream commit de7e6194301ad31c4ce95395eb678e51a1b907e5 ]
FCoE adapter initialization failed for ISP8021 with the following patch applied. In addition, reproduction of the issue the patch originally tried to address has been unsuccessful.
This reverts commit 3cb182b3fa8b7a61f05c671525494697cba39c6a.
Link: https://lore.kernel.org/r/20200806111014.28434-11-njavali@marvell.com Reviewed-by: Himanshu Madhani himanshu.madhani@oracle.com Signed-off-by: Saurav Kashyap skashyap@marvell.com Signed-off-by: Nilesh Javali njavali@marvell.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/qla2xxx/qla_mbx.c | 8 -------- 1 file changed, 8 deletions(-)
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index ac5d2d34aeeae..07c5d7397d425 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -329,14 +329,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (time_after(jiffies, wait_time)) break;
- /* - * Check if it's UNLOADING, cause we cannot poll in - * this case, or else a NULL pointer dereference - * is triggered. - */ - if (unlikely(test_bit(UNLOADING, &base_vha->dpc_flags))) - return QLA_FUNCTION_TIMEOUT; - /* Check for pending interrupts. */ qla2x00_poll(ha->rsp_q_map[0]);
From: Alvin Šipraga alsi@bang-olufsen.dk
[ Upstream commit 8b61fba503904acae24aeb2bd5569b4d6544d48f ]
Remote source MAC addresses can be set on a 'source mode' macvlan interface via the IFLA_MACVLAN_MACADDR_DATA attribute. This commit tightens the validation of these MAC addresses to match the validation already performed when setting or adding a single MAC address via the IFLA_MACVLAN_MACADDR attribute.
iproute2 uses IFLA_MACVLAN_MACADDR_DATA for its 'macvlan macaddr set' command, and IFLA_MACVLAN_MACADDR for its 'macvlan macaddr add' command, which demonstrates the inconsistent behaviour that this commit addresses:
# ip link add link eth0 name macvlan0 type macvlan mode source # ip link set link dev macvlan0 type macvlan macaddr add 01:00:00:00:00:00 RTNETLINK answers: Cannot assign requested address # ip link set link dev macvlan0 type macvlan macaddr set 01:00:00:00:00:00 # ip -d link show macvlan0 5: macvlan0@eth0: <BROADCAST,MULTICAST,DYNAMIC,UP,LOWER_UP> mtu 1500 ... link/ether 2e:ac:fd:2d:69:f8 brd ff:ff:ff:ff:ff:ff promiscuity 0 macvlan mode source remotes (1) 01:00:00:00:00:00 numtxqueues 1 ...
With this change, the 'set' command will (rightly) fail in the same way as the 'add' command.
Signed-off-by: Alvin Šipraga alsi@bang-olufsen.dk Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/macvlan.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 349123592af0f..e226a96da3a39 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1230,6 +1230,9 @@ static void macvlan_port_destroy(struct net_device *dev) static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { + struct nlattr *nla, *head; + int rem, len; + if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; @@ -1277,6 +1280,20 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], return -EADDRNOTAVAIL; }
+ if (data[IFLA_MACVLAN_MACADDR_DATA]) { + head = nla_data(data[IFLA_MACVLAN_MACADDR_DATA]); + len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]); + + nla_for_each_attr(nla, head, len, rem) { + if (nla_type(nla) != IFLA_MACVLAN_MACADDR || + nla_len(nla) != ETH_ALEN) + return -EINVAL; + + if (!is_valid_ether_addr(nla_data(nla))) + return -EADDRNOTAVAIL; + } + } + if (data[IFLA_MACVLAN_MACADDR_COUNT]) return -EINVAL;
@@ -1333,10 +1350,6 @@ static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode, len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]);
nla_for_each_attr(nla, head, len, rem) { - if (nla_type(nla) != IFLA_MACVLAN_MACADDR || - nla_len(nla) != ETH_ALEN) - continue; - addr = nla_data(nla); ret = macvlan_hash_add_source(vlan, addr); if (ret)
From: Sumera Priyadarsini sylphrenadin@gmail.com
[ Upstream commit 989e4da042ca4a56bbaca9223d1a93639ad11e17 ]
Every iteration of for_each_available_child_of_node() decrements reference count of the previous node, however when control is transferred from the middle of the loop, as in the case of a return or break or goto, there is no decrement thus ultimately resulting in a memory leak.
Fix a potential memory leak in gianfar.c by inserting of_node_put() before the goto statement.
Issue found with Coccinelle.
Signed-off-by: Sumera Priyadarsini sylphrenadin@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/freescale/gianfar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index cf2d1e846a692..8243501c37574 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -844,8 +844,10 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) continue;
err = gfar_parse_group(child, priv, model); - if (err) + if (err) { + of_node_put(child); goto err_grp_init; + } } } else { /* SQ_SG_MODE */ err = gfar_parse_group(np, priv, model);
From: Athira Rajeev atrajeev@linux.vnet.ibm.com
[ Upstream commit 17899eaf88d689529b866371344c8f269ba79b5f ]
Performance monitor interrupt handler checks if any counter has overflown and calls record_and_restart() in core-book3s which invokes perf_event_overflow() to record the sample information. Apart from creating sample, perf_event_overflow() also does the interrupt and period checks via perf_event_account_interrupt().
Currently we record information only if the SIAR (Sampled Instruction Address Register) valid bit is set (using siar_valid() check) and hence the interrupt check.
But it is possible that we do sampling for some events that are not generating valid SIAR, and hence there is no chance to disable the event if interrupts are more than max_samples_per_tick. This leads to soft lockup.
Fix this by adding perf_event_account_interrupt() in the invalid SIAR code path for a sampling event. ie if SIAR is invalid, just do interrupt check and don't record the sample information.
Reported-by: Alexey Kardashevskiy aik@ozlabs.ru Signed-off-by: Athira Rajeev atrajeev@linux.vnet.ibm.com Tested-by: Alexey Kardashevskiy aik@ozlabs.ru Signed-off-by: Michael Ellerman mpe@ellerman.id.au Link: https://lore.kernel.org/r/1596717992-7321-1-git-send-email-atrajeev@linux.vn... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/perf/core-book3s.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 4004dbdab9c7b..d407b73298171 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2087,6 +2087,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); + } else if (period) { + /* Account for interrupt in case of invalid SIAR */ + if (perf_event_account_interrupt(event)) + power_pmu_stop(event, 0); } }
linux-stable-mirror@lists.linaro.org