From: Al Viro viro@zeniv.linux.org.uk
[ Upstream commit f8d4f44df056c5b504b0d49683fb7279218fd207 ]
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/eventpoll.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8107e06d7f6f5..5207dfc85b786 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1522,6 +1522,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, RCU_INIT_POINTER(epi->ws, NULL); }
+ /* Add the current item to the list of active epoll hook for this file */ + spin_lock(&tfile->f_lock); + list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); + spin_unlock(&tfile->f_lock); + + /* + * Add the current item to the RB tree. All RB tree operations are + * protected by "mtx", and ep_insert() is called with "mtx" held. + */ + ep_rbtree_insert(ep, epi); + + /* now check if we've created too many backpaths */ + error = -EINVAL; + if (full_check && reverse_path_check()) + goto error_remove_epi; + /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); @@ -1544,22 +1560,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, if (epi->nwait < 0) goto error_unregister;
- /* Add the current item to the list of active epoll hook for this file */ - spin_lock(&tfile->f_lock); - list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); - spin_unlock(&tfile->f_lock); - - /* - * Add the current item to the RB tree. All RB tree operations are - * protected by "mtx", and ep_insert() is called with "mtx" held. - */ - ep_rbtree_insert(ep, epi); - - /* now check if we've created too many backpaths */ - error = -EINVAL; - if (full_check && reverse_path_check()) - goto error_remove_epi; - /* We have to drop the new item inside our item list to keep track of it */ write_lock_irq(&ep->lock);
@@ -1588,6 +1588,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
return 0;
+error_unregister: + ep_unregister_pollwait(ep, epi); error_remove_epi: spin_lock(&tfile->f_lock); list_del_rcu(&epi->fllink); @@ -1595,9 +1597,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
rb_erase_cached(&epi->rbn, &ep->rbr);
-error_unregister: - ep_unregister_pollwait(ep, epi); - /* * We need to do this because an event could have been arrived on some * allocated wait queue. Note that we don't care about the ep->ovflist
From: Al Viro viro@zeniv.linux.org.uk
[ Upstream commit 18306c404abe18a0972587a6266830583c60c928 ]
removes the need to clear it, along with the races.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/eventpoll.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5207dfc85b786..82ab9a25f12f2 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -218,8 +218,7 @@ struct eventpoll { struct file *file;
/* used to optimize loop detection check */ - struct list_head visited_list_link; - int visited; + u64 gen;
#ifdef CONFIG_NET_RX_BUSY_POLL /* used to track busy poll napi_id */ @@ -274,6 +273,8 @@ static long max_user_watches __read_mostly; */ static DEFINE_MUTEX(epmutex);
+static u64 loop_check_gen = 0; + /* Used to check for epoll file descriptor inclusion loops */ static struct nested_calls poll_loop_ncalls;
@@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly;
-/* Visited nodes during ep_loop_check(), so we can unset them when we finish */ -static LIST_HEAD(visited_list); - /* * List of files with newly added links, where we may need to limit the number * of emanating paths. Protected by the epmutex. @@ -1971,13 +1969,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1); - ep->visited = 1; - list_add(&ep->visited_list_link, &visited_list); + ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { ep_tovisit = epi->ffd.file->private_data; - if (ep_tovisit->visited) + if (ep_tovisit->gen == loop_check_gen) continue; error = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, epi->ffd.file, @@ -2018,18 +2015,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) */ static int ep_loop_check(struct eventpoll *ep, struct file *file) { - int ret; - struct eventpoll *ep_cur, *ep_next; - - ret = ep_call_nested(&poll_loop_ncalls, + return ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, file, ep, current); - /* clear visited list */ - list_for_each_entry_safe(ep_cur, ep_next, &visited_list, - visited_list_link) { - ep_cur->visited = 0; - list_del(&ep_cur->visited_list_link); - } - return ret; }
static void clear_tfile_check_list(void) @@ -2199,6 +2186,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error = epoll_mutex_lock(&epmutex, 0, nonblock); if (error) goto error_tgt_fput; + loop_check_gen++; full_check = 1; if (is_file_epoll(tf.file)) { error = -ELOOP; @@ -2262,6 +2250,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error_tgt_fput: if (full_check) { clear_tfile_check_list(); + loop_check_gen++; mutex_unlock(&epmutex); }
From: Al Viro viro@zeniv.linux.org.uk
[ Upstream commit fe0a916c1eae8e17e86c3753d13919177d63ed7e ]
Checking for the lack of epitems refering to the epoll we want to insert into is not enough; we might have an insertion of that epoll into another one that has already collected the set of files to recheck for excessive reverse paths, but hasn't gotten to creating/inserting the epitem for it.
However, any such insertion in progress can be detected - it will update the generation count in our epoll when it's done looking through it for files to check. That gets done under ->mtx of our epoll and that allows us to detect that safely.
We are *not* holding epmutex here, so the generation count is not stable. However, since both the update of ep->gen by loop check and (later) insertion into ->f_ep_link are done with ep->mtx held, we are fine - the sequence is grab epmutex bump loop_check_gen ... grab tep->mtx // 1 tep->gen = loop_check_gen ... drop tep->mtx // 2 ... grab tep->mtx // 3 ... insert into ->f_ep_link ... drop tep->mtx // 4 bump loop_check_gen drop epmutex and if the fastpath check in another thread happens for that eventpoll, it can come * before (1) - in that case fastpath is just fine * after (4) - we'll see non-empty ->f_ep_link, slow path taken * between (2) and (3) - loop_check_gen is stable, with ->mtx providing barriers and we end up taking slow path.
Note that ->f_ep_link emptiness check is slightly racy - we are protected against insertions into that list, but removals can happen right under us. Not a problem - in the worst case we'll end up taking a slow path for no good reason.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/eventpoll.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 82ab9a25f12f2..16313180e4c16 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2181,6 +2181,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, goto error_tgt_fput; if (op == EPOLL_CTL_ADD) { if (!list_empty(&f.file->f_ep_links) || + ep->gen == loop_check_gen || is_file_epoll(tf.file)) { mutex_unlock(&ep->mtx); error = epoll_mutex_lock(&epmutex, 0, nonblock);
From: Philip Yang Philip.Yang@amd.com
[ Upstream commit 1d0e16ac1a9e800598dcfa5b6bc53b704a103390 ]
Set ttm->sg to NULL after kfree, to avoid memory corruption backtrace:
[ 420.932812] kernel BUG at /build/linux-do9eLF/linux-4.15.0/mm/slub.c:295! [ 420.934182] invalid opcode: 0000 [#1] SMP NOPTI [ 420.935445] Modules linked in: xt_conntrack ipt_MASQUERADE [ 420.951332] Hardware name: Dell Inc. PowerEdge R7525/0PYVT1, BIOS 1.5.4 07/09/2020 [ 420.952887] RIP: 0010:__slab_free+0x180/0x2d0 [ 420.954419] RSP: 0018:ffffbe426291fa60 EFLAGS: 00010246 [ 420.955963] RAX: ffff9e29263e9c30 RBX: ffff9e29263e9c30 RCX: 000000018100004b [ 420.957512] RDX: ffff9e29263e9c30 RSI: fffff3d33e98fa40 RDI: ffff9e297e407a80 [ 420.959055] RBP: ffffbe426291fb00 R08: 0000000000000001 R09: ffffffffc0d39ade [ 420.960587] R10: ffffbe426291fb20 R11: ffff9e49ffdd4000 R12: ffff9e297e407a80 [ 420.962105] R13: fffff3d33e98fa40 R14: ffff9e29263e9c30 R15: ffff9e2954464fd8 [ 420.963611] FS: 00007fa2ea097780(0000) GS:ffff9e297e840000(0000) knlGS:0000000000000000 [ 420.965144] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 420.966663] CR2: 00007f16bfffefb8 CR3: 0000001ff0c62000 CR4: 0000000000340ee0 [ 420.968193] Call Trace: [ 420.969703] ? __page_cache_release+0x3c/0x220 [ 420.971294] ? amdgpu_ttm_tt_unpopulate+0x5e/0x80 [amdgpu] [ 420.972789] kfree+0x168/0x180 [ 420.974353] ? amdgpu_ttm_tt_set_user_pages+0x64/0xc0 [amdgpu] [ 420.975850] ? kfree+0x168/0x180 [ 420.977403] amdgpu_ttm_tt_unpopulate+0x5e/0x80 [amdgpu] [ 420.978888] ttm_tt_unpopulate.part.10+0x53/0x60 [amdttm] [ 420.980357] ttm_tt_destroy.part.11+0x4f/0x60 [amdttm] [ 420.981814] ttm_tt_destroy+0x13/0x20 [amdttm] [ 420.983273] ttm_bo_cleanup_memtype_use+0x36/0x80 [amdttm] [ 420.984725] ttm_bo_release+0x1c9/0x360 [amdttm] [ 420.986167] amdttm_bo_put+0x24/0x30 [amdttm] [ 420.987663] amdgpu_bo_unref+0x1e/0x30 [amdgpu] [ 420.989165] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x9ca/0xb10 [amdgpu] [ 420.990666] kfd_ioctl_alloc_memory_of_gpu+0xef/0x2c0 [amdgpu]
Signed-off-by: Philip Yang Philip.Yang@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Christian König christian.koenig@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e59c01a83dace..9a3267f06376f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1052,6 +1052,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
release_sg: kfree(ttm->sg); + ttm->sg = NULL; return r; }
From: Al Viro viro@zeniv.linux.org.uk
[ Upstream commit 3701cb59d892b88d569427586f01491552f377b1 ]
or get freed, for that matter, if it's a long (separately stored) name.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/eventpoll.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 16313180e4c16..4df61129566d4 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1448,7 +1448,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi) { - const char *name; + struct name_snapshot n; struct wakeup_source *ws;
if (!epi->ep->ws) { @@ -1457,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi) return -ENOMEM; }
- name = epi->ffd.file->f_path.dentry->d_name.name; - ws = wakeup_source_register(NULL, name); + take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); + ws = wakeup_source_register(NULL, n.name.name); + release_dentry_name_snapshot(&n);
if (!ws) return -ENOMEM;
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 313b085851c13ca08320372a05a7047ea25d3dd4 ]
We need to move the closing of the src_device out of all the device replace locking, but we definitely want to zero out the superblock before we commit the last time to make sure the device is properly removed. Handle this by pushing btrfs_scratch_superblocks into btrfs_dev_replace_finishing, and then later on we'll move the src_device closing and freeing stuff where we need it to be.
Reviewed-by: Nikolay Borisov nborisov@suse.com Signed-off-by: Josef Bacik josef@toxicpanda.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/dev-replace.c | 3 +++ fs/btrfs/volumes.c | 12 +++--------- fs/btrfs/volumes.h | 3 +++ 3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index db93909b25e08..7cf48aeb6f14e 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -745,6 +745,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, /* replace the sysfs entry */ btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device); btrfs_sysfs_update_devid(tgt_device); + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state)) + btrfs_scratch_superblocks(fs_info, src_device->bdev, + src_device->name->str); btrfs_rm_dev_replace_free_srcdev(src_device);
/* write back the superblocks */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 956eb0d6bc584..8b5f666a3ea66 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1999,9 +1999,9 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info) return num_devices; }
-static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, - struct block_device *bdev, - const char *device_path) +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, + struct block_device *bdev, + const char *device_path) { struct btrfs_super_block *disk_super; int copy_num; @@ -2224,12 +2224,6 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) struct btrfs_fs_info *fs_info = srcdev->fs_info; struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
- if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) { - /* zero out the old super if it is writable */ - btrfs_scratch_superblocks(fs_info, srcdev->bdev, - srcdev->name->str); - } - btrfs_close_bdev(srcdev); synchronize_rcu(); btrfs_free_device(srcdev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 75af2334b2e37..83862e27f5663 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -573,6 +573,9 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev); +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, + struct block_device *bdev, + const char *device_path);
int btrfs_bg_type_to_factor(u64 flags); const char *btrfs_bg_type_to_raid_name(u64 flags);
From: Jens Axboe axboe@kernel.dk
[ Upstream commit fad8e0de4426a776c9bcb060555e7c09e2d08db6 ]
syzbot reports a potential lock deadlock between the normal IO path and ->show_fdinfo():
====================================================== WARNING: possible circular locking dependency detected 5.9.0-rc6-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor.2/19710 is trying to acquire lock: ffff888098ddc450 (sb_writers#4){.+.+}-{0:0}, at: io_write+0x6b5/0xb30 fs/io_uring.c:3296
but task is already holding lock: ffff8880a11b8428 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (&ctx->uring_lock){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 __io_uring_show_fdinfo fs/io_uring.c:8417 [inline] io_uring_show_fdinfo+0x194/0xc70 fs/io_uring.c:8460 seq_show+0x4a8/0x700 fs/proc/fd.c:65 seq_read+0x432/0x1070 fs/seq_file.c:208 do_loop_readv_writev fs/read_write.c:734 [inline] do_loop_readv_writev fs/read_write.c:721 [inline] do_iter_read+0x48e/0x6e0 fs/read_write.c:955 vfs_readv+0xe5/0x150 fs/read_write.c:1073 kernel_readv fs/splice.c:355 [inline] default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412 do_splice_to+0x137/0x170 fs/splice.c:871 splice_direct_to_actor+0x307/0x980 fs/splice.c:950 do_splice_direct+0x1b3/0x280 fs/splice.c:1059 do_sendfile+0x55f/0xd40 fs/read_write.c:1540 __do_sys_sendfile64 fs/read_write.c:1601 [inline] __se_sys_sendfile64 fs/read_write.c:1587 [inline] __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #1 (&p->lock){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 seq_read+0x61/0x1070 fs/seq_file.c:155 pde_read fs/proc/inode.c:306 [inline] proc_reg_read+0x221/0x300 fs/proc/inode.c:318 do_loop_readv_writev fs/read_write.c:734 [inline] do_loop_readv_writev fs/read_write.c:721 [inline] do_iter_read+0x48e/0x6e0 fs/read_write.c:955 vfs_readv+0xe5/0x150 fs/read_write.c:1073 kernel_readv fs/splice.c:355 [inline] default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412 do_splice_to+0x137/0x170 fs/splice.c:871 splice_direct_to_actor+0x307/0x980 fs/splice.c:950 do_splice_direct+0x1b3/0x280 fs/splice.c:1059 do_sendfile+0x55f/0xd40 fs/read_write.c:1540 __do_sys_sendfile64 fs/read_write.c:1601 [inline] __se_sys_sendfile64 fs/read_write.c:1587 [inline] __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #0 (sb_writers#4){.+.+}-{0:0}: check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441 lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029 percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write+0x228/0x450 fs/super.c:1672 io_write+0x6b5/0xb30 fs/io_uring.c:3296 io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719 __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175 io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254 io_submit_sqe fs/io_uring.c:6324 [inline] io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521 __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9
other info that might help us debug this:
Chain exists of: sb_writers#4 --> &p->lock --> &ctx->uring_lock
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(&ctx->uring_lock); lock(&p->lock); lock(&ctx->uring_lock); lock(sb_writers#4);
*** DEADLOCK ***
1 lock held by syz-executor.2/19710: #0: ffff8880a11b8428 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348
stack backtrace: CPU: 0 PID: 19710 Comm: syz-executor.2 Not tainted 5.9.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827 check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441 lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029 percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write+0x228/0x450 fs/super.c:1672 io_write+0x6b5/0xb30 fs/io_uring.c:3296 io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719 __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175 io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254 io_submit_sqe fs/io_uring.c:6324 [inline] io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521 __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45e179 Code: 3d b2 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 0b b2 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f1194e74c78 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa RAX: ffffffffffffffda RBX: 00000000000082c0 RCX: 000000000045e179 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000004 RBP: 000000000118cf98 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cf4c R13: 00007ffd1aa5756f R14: 00007f1194e759c0 R15: 000000000118cf4c
Fix this by just not diving into details if we fail to trylock the io_uring mutex. We know the ctx isn't going away during this operation, but we cannot safely iterate buffers/files/personalities if we don't hold the io_uring mutex.
Reported-by: syzbot+2f8fa4e860edc3066aba@syzkaller.appspotmail.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/io_uring.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 1d5640cc2a488..25017418348ca 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7994,11 +7994,19 @@ static int io_uring_show_cred(int id, void *p, void *data)
static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { + bool has_lock; int i;
- mutex_lock(&ctx->uring_lock); + /* + * Avoid ABBA deadlock between the seq lock and the io_uring mutex, + * since fdinfo case grabs it in the opposite direction of normal use + * cases. If we fail to get the lock, we just don't iterate any + * structures that could be going away outside the io_uring mutex. + */ + has_lock = mutex_trylock(&ctx->uring_lock); + seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); - for (i = 0; i < ctx->nr_user_files; i++) { + for (i = 0; has_lock && i < ctx->nr_user_files; i++) { struct fixed_file_table *table; struct file *f;
@@ -8010,13 +8018,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) seq_printf(m, "%5u: <none>\n", i); } seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); - for (i = 0; i < ctx->nr_user_bufs; i++) { + for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, (unsigned int) buf->len); } - if (!idr_is_empty(&ctx->personality_idr)) { + if (has_lock && !idr_is_empty(&ctx->personality_idr)) { seq_printf(m, "Personalities:\n"); idr_for_each(&ctx->personality_idr, io_uring_show_cred, m); } @@ -8031,7 +8039,8 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) req->task->task_works != NULL); } spin_unlock_irq(&ctx->completion_lock); - mutex_unlock(&ctx->uring_lock); + if (has_lock) + mutex_unlock(&ctx->uring_lock); }
static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
From: Sudheesh Mavila sudheesh.mavila@amd.com
[ Upstream commit 97cf32996c46d9935cc133d910a75fb687dd6144 ]
SMU10_UMD_PSTATE_PEAK_FCLK value should not be used to set the DPM.
Suggested-by: Evan Quan evan.quan@amd.com Reviewed-by: Evan Quan evan.quan@amd.com Signed-off-by: Sudheesh Mavila sudheesh.mavila@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 9ee8cf8267c88..43f7adff6cb74 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -563,6 +563,8 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, struct smu10_hwmgr *data = hwmgr->backend; uint32_t min_sclk = hwmgr->display_config->min_core_set_clock; uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; + uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1; + uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1;
if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -676,13 +678,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - SMU10_UMD_PSTATE_PEAK_FCLK : + data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : min_mclk, NULL);
smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - SMU10_UMD_PSTATE_MIN_SOCCLK, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -695,11 +697,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - SMU10_UMD_PSTATE_PEAK_FCLK, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - SMU10_UMD_PSTATE_PEAK_SOCCLK, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn,
From: Flora Cui flora.cui@amd.com
[ Upstream commit 898c7302f4de1d91065e80fc46552b3ec70894ff ]
max_caps might be 0, thus hdcp_work might be ZERO_SIZE_PTR
Signed-off-by: Flora Cui flora.cui@amd.com Reviewed-by: Feifei Xu Feifei.Xu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 949d10ef83040..6dd1f3f8d9903 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -568,7 +568,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct int i = 0;
hdcp_work = kcalloc(max_caps, sizeof(*hdcp_work), GFP_KERNEL); - if (hdcp_work == NULL) + if (ZERO_OR_NULL_PTR(hdcp_work)) return NULL;
hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm), GFP_KERNEL);
From: Zack Rusin zackr@vmware.com
[ Upstream commit f54c4442893b8dfbd3aff8e903c54dfff1aef990 ]
ttm_mem_type_manager_func.get_node was changed to return -ENOSPC instead of setting the node pointer to NULL. Unfortunately vmwgfx still had two places where it was explicitly converting -ENOSPC to 0 causing regressions. This fixes those spots by allowing -ENOSPC to be returned. That seems to fix recent regressions with vmwgfx.
Signed-off-by: Zack Rusin zackr@vmware.com Reviewed-by: Roland Scheidegger sroland@vmware.com Reviewed-by: Martin Krastev krastevm@vmware.com Sigend-off-by: Roland Scheidegger sroland@vmware.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_thp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 7da752ca1c34b..b93c558dd86e0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -57,7 +57,7 @@ static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man,
id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL); if (id < 0) - return (id != -ENOMEM ? 0 : id); + return id;
spin_lock(&gman->lock);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c index b7c816ba71663..c8b9335bccd8d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c @@ -95,7 +95,7 @@ static int vmw_thp_get_node(struct ttm_mem_type_manager *man, mem->start = node->start; }
- return 0; + return ret; }
Not entirely sure how the patches for autosel were selected, but this patch is no good for 5.8, since the patch which introduced the breakage in the first place is only in 5.9 (in particular it was 58e4d686d456c3e356439ae160ff4a0728940b8e, drm/ttm: cleanup ttm_mem_type_manager_func.get_node interface v3), and at least I don't see that one being backported to 5.8.
Roland
Am 05.10.20 um 16:44 schrieb Sasha Levin:
From: Zack Rusin zackr@vmware.com
[ Upstream commit f54c4442893b8dfbd3aff8e903c54dfff1aef990 ]
ttm_mem_type_manager_func.get_node was changed to return -ENOSPC instead of setting the node pointer to NULL. Unfortunately vmwgfx still had two places where it was explicitly converting -ENOSPC to 0 causing regressions. This fixes those spots by allowing -ENOSPC to be returned. That seems to fix recent regressions with vmwgfx.
Signed-off-by: Zack Rusin zackr@vmware.com Reviewed-by: Roland Scheidegger sroland@vmware.com Reviewed-by: Martin Krastev krastevm@vmware.com Sigend-off-by: Roland Scheidegger sroland@vmware.com Signed-off-by: Sasha Levin sashal@kernel.org
drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_thp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 7da752ca1c34b..b93c558dd86e0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -57,7 +57,7 @@ static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man, id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL); if (id < 0)
return (id != -ENOMEM ? 0 : id);
return id;
spin_lock(&gman->lock); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c index b7c816ba71663..c8b9335bccd8d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c @@ -95,7 +95,7 @@ static int vmw_thp_get_node(struct ttm_mem_type_manager *man, mem->start = node->start; }
- return 0;
- return ret;
}
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit a466c85edc6fbe845facc8f57c408c544f42899e ]
When closing and freeing the source device we could end up doing our final blkdev_put() on the bdev, which will grab the bd_mutex. As such we want to be holding as few locks as possible, so move this call outside of the dev_replace->lock_finishing_cancel_unmount lock. Since we're modifying the fs_devices we need to make sure we're holding the uuid_mutex here, so take that as well.
There's a report from syzbot probably hitting one of the cases where the bd_mutex and device_list_mutex are taken in the wrong order, however it's not with device replace, like this patch fixes. As there's no reproducer available so far, we can't verify the fix.
https://lore.kernel.org/lkml/000000000000fc04d105afcf86d7@google.com/ dashboard link: https://syzkaller.appspot.com/bug?extid=84a0634dc5d21d488419
WARNING: possible circular locking dependency detected 5.9.0-rc5-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor.0/6878 is trying to acquire lock: ffff88804c17d780 (&bdev->bd_mutex){+.+.}-{3:3}, at: blkdev_put+0x30/0x520 fs/block_dev.c:1804
but task is already holding lock: ffff8880908cfce0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: close_fs_devices.part.0+0x2e/0x800 fs/btrfs/volumes.c:1159
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #4 (&fs_devs->device_list_mutex){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 btrfs_finish_chunk_alloc+0x281/0xf90 fs/btrfs/volumes.c:5255 btrfs_create_pending_block_groups+0x2f3/0x700 fs/btrfs/block-group.c:2109 __btrfs_end_transaction+0xf5/0x690 fs/btrfs/transaction.c:916 find_free_extent_update_loop fs/btrfs/extent-tree.c:3807 [inline] find_free_extent+0x23b7/0x2e60 fs/btrfs/extent-tree.c:4127 btrfs_reserve_extent+0x166/0x460 fs/btrfs/extent-tree.c:4206 cow_file_range+0x3de/0x9b0 fs/btrfs/inode.c:1063 btrfs_run_delalloc_range+0x2cf/0x1410 fs/btrfs/inode.c:1838 writepage_delalloc+0x150/0x460 fs/btrfs/extent_io.c:3439 __extent_writepage+0x441/0xd00 fs/btrfs/extent_io.c:3653 extent_write_cache_pages.constprop.0+0x69d/0x1040 fs/btrfs/extent_io.c:4249 extent_writepages+0xcd/0x2b0 fs/btrfs/extent_io.c:4370 do_writepages+0xec/0x290 mm/page-writeback.c:2352 __writeback_single_inode+0x125/0x1400 fs/fs-writeback.c:1461 writeback_sb_inodes+0x53d/0xf40 fs/fs-writeback.c:1721 wb_writeback+0x2ad/0xd40 fs/fs-writeback.c:1894 wb_do_writeback fs/fs-writeback.c:2039 [inline] wb_workfn+0x2dc/0x13e0 fs/fs-writeback.c:2080 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
-> #3 (sb_internal#2){.+.+}-{0:0}: percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write+0x234/0x470 fs/super.c:1672 sb_start_intwrite include/linux/fs.h:1690 [inline] start_transaction+0xbe7/0x1170 fs/btrfs/transaction.c:624 find_free_extent_update_loop fs/btrfs/extent-tree.c:3789 [inline] find_free_extent+0x25e1/0x2e60 fs/btrfs/extent-tree.c:4127 btrfs_reserve_extent+0x166/0x460 fs/btrfs/extent-tree.c:4206 cow_file_range+0x3de/0x9b0 fs/btrfs/inode.c:1063 btrfs_run_delalloc_range+0x2cf/0x1410 fs/btrfs/inode.c:1838 writepage_delalloc+0x150/0x460 fs/btrfs/extent_io.c:3439 __extent_writepage+0x441/0xd00 fs/btrfs/extent_io.c:3653 extent_write_cache_pages.constprop.0+0x69d/0x1040 fs/btrfs/extent_io.c:4249 extent_writepages+0xcd/0x2b0 fs/btrfs/extent_io.c:4370 do_writepages+0xec/0x290 mm/page-writeback.c:2352 __writeback_single_inode+0x125/0x1400 fs/fs-writeback.c:1461 writeback_sb_inodes+0x53d/0xf40 fs/fs-writeback.c:1721 wb_writeback+0x2ad/0xd40 fs/fs-writeback.c:1894 wb_do_writeback fs/fs-writeback.c:2039 [inline] wb_workfn+0x2dc/0x13e0 fs/fs-writeback.c:2080 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
-> #2 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}: __flush_work+0x60e/0xac0 kernel/workqueue.c:3041 wb_shutdown+0x180/0x220 mm/backing-dev.c:355 bdi_unregister+0x174/0x590 mm/backing-dev.c:872 del_gendisk+0x820/0xa10 block/genhd.c:933 loop_remove drivers/block/loop.c:2192 [inline] loop_control_ioctl drivers/block/loop.c:2291 [inline] loop_control_ioctl+0x3b1/0x480 drivers/block/loop.c:2257 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #1 (loop_ctl_mutex){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 lo_open+0x19/0xd0 drivers/block/loop.c:1893 __blkdev_get+0x759/0x1aa0 fs/block_dev.c:1507 blkdev_get fs/block_dev.c:1639 [inline] blkdev_open+0x227/0x300 fs/block_dev.c:1753 do_dentry_open+0x4b9/0x11b0 fs/open.c:817 do_open fs/namei.c:3251 [inline] path_openat+0x1b9a/0x2730 fs/namei.c:3368 do_filp_open+0x17e/0x3c0 fs/namei.c:3395 do_sys_openat2+0x16d/0x420 fs/open.c:1168 do_sys_open fs/open.c:1184 [inline] __do_sys_open fs/open.c:1192 [inline] __se_sys_open fs/open.c:1188 [inline] __x64_sys_open+0x119/0x1c0 fs/open.c:1188 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #0 (&bdev->bd_mutex){+.+.}-{3:3}: check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4426 lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006 __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 blkdev_put+0x30/0x520 fs/block_dev.c:1804 btrfs_close_bdev fs/btrfs/volumes.c:1117 [inline] btrfs_close_bdev fs/btrfs/volumes.c:1107 [inline] btrfs_close_one_device fs/btrfs/volumes.c:1133 [inline] close_fs_devices.part.0+0x1a4/0x800 fs/btrfs/volumes.c:1161 close_fs_devices fs/btrfs/volumes.c:1193 [inline] btrfs_close_devices+0x95/0x1f0 fs/btrfs/volumes.c:1179 close_ctree+0x688/0x6cb fs/btrfs/disk-io.c:4149 generic_shutdown_super+0x144/0x370 fs/super.c:464 kill_anon_super+0x36/0x60 fs/super.c:1108 btrfs_kill_super+0x38/0x50 fs/btrfs/super.c:2265 deactivate_locked_super+0x94/0x160 fs/super.c:335 deactivate_super+0xad/0xd0 fs/super.c:366 cleanup_mnt+0x3a3/0x530 fs/namespace.c:1118 task_work_run+0xdd/0x190 kernel/task_work.c:141 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:163 [inline] exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265 entry_SYSCALL_64_after_hwframe+0x44/0xa9
other info that might help us debug this:
Chain exists of: &bdev->bd_mutex --> sb_internal#2 --> &fs_devs->device_list_mutex
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(&fs_devs->device_list_mutex); lock(sb_internal#2); lock(&fs_devs->device_list_mutex); lock(&bdev->bd_mutex);
*** DEADLOCK ***
3 locks held by syz-executor.0/6878: #0: ffff88809070c0e0 (&type->s_umount_key#70){++++}-{3:3}, at: deactivate_super+0xa5/0xd0 fs/super.c:365 #1: ffffffff8a5b37a8 (uuid_mutex){+.+.}-{3:3}, at: btrfs_close_devices+0x23/0x1f0 fs/btrfs/volumes.c:1178 #2: ffff8880908cfce0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: close_fs_devices.part.0+0x2e/0x800 fs/btrfs/volumes.c:1159
stack backtrace: CPU: 0 PID: 6878 Comm: syz-executor.0 Not tainted 5.9.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827 check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4426 lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006 __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 blkdev_put+0x30/0x520 fs/block_dev.c:1804 btrfs_close_bdev fs/btrfs/volumes.c:1117 [inline] btrfs_close_bdev fs/btrfs/volumes.c:1107 [inline] btrfs_close_one_device fs/btrfs/volumes.c:1133 [inline] close_fs_devices.part.0+0x1a4/0x800 fs/btrfs/volumes.c:1161 close_fs_devices fs/btrfs/volumes.c:1193 [inline] btrfs_close_devices+0x95/0x1f0 fs/btrfs/volumes.c:1179 close_ctree+0x688/0x6cb fs/btrfs/disk-io.c:4149 generic_shutdown_super+0x144/0x370 fs/super.c:464 kill_anon_super+0x36/0x60 fs/super.c:1108 btrfs_kill_super+0x38/0x50 fs/btrfs/super.c:2265 deactivate_locked_super+0x94/0x160 fs/super.c:335 deactivate_super+0xad/0xd0 fs/super.c:366 cleanup_mnt+0x3a3/0x530 fs/namespace.c:1118 task_work_run+0xdd/0x190 kernel/task_work.c:141 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:163 [inline] exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x460027 RSP: 002b:00007fff59216328 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000000000076035 RCX: 0000000000460027 RDX: 0000000000403188 RSI: 0000000000000002 RDI: 00007fff592163d0 RBP: 0000000000000333 R08: 0000000000000000 R09: 000000000000000b R10: 0000000000000005 R11: 0000000000000246 R12: 00007fff59217460 R13: 0000000002df2a60 R14: 0000000000000000 R15: 00007fff59217460
Signed-off-by: Josef Bacik josef@toxicpanda.com [ add syzbot reference ] Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/dev-replace.c | 3 ++- fs/btrfs/volumes.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7cf48aeb6f14e..62aece1bb8ec1 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -748,7 +748,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state)) btrfs_scratch_superblocks(fs_info, src_device->bdev, src_device->name->str); - btrfs_rm_dev_replace_free_srcdev(src_device);
/* write back the superblocks */ trans = btrfs_start_transaction(root, 0); @@ -757,6 +756,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+ btrfs_rm_dev_replace_free_srcdev(src_device); + return 0; }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8b5f666a3ea66..79e9a80bd37a0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2224,6 +2224,8 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) struct btrfs_fs_info *fs_info = srcdev->fs_info; struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
+ mutex_lock(&uuid_mutex); + btrfs_close_bdev(srcdev); synchronize_rcu(); btrfs_free_device(srcdev); @@ -2252,6 +2254,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) close_fs_devices(fs_devices); free_fs_devices(fs_devices); } + mutex_unlock(&uuid_mutex); }
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
From: Lu Baolu baolu.lu@linux.intel.com
[ Upstream commit 1a3f2fd7fc4e8f24510830e265de2ffb8e3300d2 ]
Lock(&iommu->lock) without disabling irq causes lockdep warnings.
[ 12.703950] ======================================================== [ 12.703962] WARNING: possible irq lock inversion dependency detected [ 12.703975] 5.9.0-rc6+ #659 Not tainted [ 12.703983] -------------------------------------------------------- [ 12.703995] systemd-udevd/284 just changed the state of lock: [ 12.704007] ffffffffbd6ff4d8 (device_domain_lock){..-.}-{2:2}, at: iommu_flush_dev_iotlb.part.57+0x2e/0x90 [ 12.704031] but this lock took another, SOFTIRQ-unsafe lock in the past: [ 12.704043] (&iommu->lock){+.+.}-{2:2} [ 12.704045]
and interrupts could create inverse lock ordering between them.
[ 12.704073] other info that might help us debug this: [ 12.704085] Possible interrupt unsafe locking scenario:
[ 12.704097] CPU0 CPU1 [ 12.704106] ---- ---- [ 12.704115] lock(&iommu->lock); [ 12.704123] local_irq_disable(); [ 12.704134] lock(device_domain_lock); [ 12.704146] lock(&iommu->lock); [ 12.704158] <Interrupt> [ 12.704164] lock(device_domain_lock); [ 12.704174] *** DEADLOCK ***
Signed-off-by: Lu Baolu baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20200927062428.13713-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iommu/intel/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index fbe0b0cc56edf..24a84d294fd01 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2617,7 +2617,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, }
/* Setup the PASID entry for requests without PASID: */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, flags); if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); @@ -2627,7 +2627,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, flags); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(dev);
linux-stable-mirror@lists.linaro.org