The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ab9b2c7b32e6be53cac2e23f5b2db66815a7d972 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Thu, 13 Feb 2020 10:47:30 -0500
Subject: [PATCH] btrfs: handle logged extent failure properly
If we're allocating a logged extent we attempt to insert an extent
record for the file extent directly. We increase
space_info->bytes_reserved, because the extent entry addition will call
btrfs_update_block_group(), which will convert the ->bytes_reserved to
->bytes_used. However if we fail at any point while inserting the
extent entry we will bail and leave space on ->bytes_reserved, which
will trigger a WARN_ON() on umount. Fix this by pinning the space if we
fail to insert, which is what happens in every other failure case that
involves adding the extent entry.
CC: stable(a)vger.kernel.org # 5.4+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 136fffb76428..7eef91d6c2b6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4422,7 +4422,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
offset, ins, 1);
if (ret)
- btrfs_pin_extent(fs_info, ins->objectid, ins->offset, 1);
+ btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
btrfs_put_block_group(block_group);
return ret;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 900fc60df22748dbc28e4970838e8f7b8f1013ce Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Date: Thu, 5 Mar 2020 01:17:27 +0530
Subject: [PATCH] remoteproc: qcom_q6v5_mss: Don't reassign mpss region on
shutdown
Trying to reclaim mpss memory while the mba is not running causes the
system to crash on devices with security fuses blown, so leave it
assigned to the remote on shutdown and recover it on a subsequent boot.
Fixes: 6c5a9dc2481b ("remoteproc: qcom: Make secure world call for mem ownership switch")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Signed-off-by: Sibi Sankar <sibis(a)codeaurora.org>
Tested-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Link: https://lore.kernel.org/r/20200304194729.27979-2-sibis@codeaurora.org
Signed-off-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c
index a1cc9cbe038f..9fed1b1c203d 100644
--- a/drivers/remoteproc/qcom_q6v5_mss.c
+++ b/drivers/remoteproc/qcom_q6v5_mss.c
@@ -1001,11 +1001,6 @@ static void q6v5_mba_reclaim(struct q6v5 *qproc)
writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
}
- ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
- false, qproc->mpss_phys,
- qproc->mpss_size);
- WARN_ON(ret);
-
q6v5_reset_assert(qproc);
q6v5_clk_disable(qproc->dev, qproc->reset_clks,
@@ -1095,6 +1090,14 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
max_addr = ALIGN(phdr->p_paddr + phdr->p_memsz, SZ_4K);
}
+ /**
+ * In case of a modem subsystem restart on secure devices, the modem
+ * memory can be reclaimed only after MBA is loaded. For modem cold
+ * boot this will be a nop
+ */
+ q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, false,
+ qproc->mpss_phys, qproc->mpss_size);
+
mpss_reloc = relocate ? min_addr : qproc->mpss_phys;
qproc->mpss_reloc = mpss_reloc;
/* Load firmware segments */
@@ -1184,8 +1187,16 @@ static void qcom_q6v5_dump_segment(struct rproc *rproc,
void *ptr = rproc_da_to_va(rproc, segment->da, segment->size);
/* Unlock mba before copying segments */
- if (!qproc->dump_mba_loaded)
+ if (!qproc->dump_mba_loaded) {
ret = q6v5_mba_load(qproc);
+ if (!ret) {
+ /* Reset ownership back to Linux to copy segments */
+ ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
+ false,
+ qproc->mpss_phys,
+ qproc->mpss_size);
+ }
+ }
if (!ptr || ret)
memset(dest, 0xff, segment->size);
@@ -1196,8 +1207,14 @@ static void qcom_q6v5_dump_segment(struct rproc *rproc,
/* Reclaim mba after copying segments */
if (qproc->dump_segment_mask == qproc->dump_complete_mask) {
- if (qproc->dump_mba_loaded)
+ if (qproc->dump_mba_loaded) {
+ /* Try to reset ownership back to Q6 */
+ q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
+ true,
+ qproc->mpss_phys,
+ qproc->mpss_size);
q6v5_mba_reclaim(qproc);
+ }
}
}
@@ -1237,10 +1254,6 @@ static int q6v5_start(struct rproc *rproc)
return 0;
reclaim_mpss:
- xfermemop_ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
- false, qproc->mpss_phys,
- qproc->mpss_size);
- WARN_ON(xfermemop_ret);
q6v5_mba_reclaim(qproc);
return ret;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f0cc2cd70164efe8f75c5d99560f0f69969c72e4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 28 Feb 2020 13:04:36 +0000
Subject: [PATCH] Btrfs: fix crash during unmount due to race with delayed
inode workers
During unmount we can have a job from the delayed inode items work queue
still running, that can lead to at least two bad things:
1) A crash, because the worker can try to create a transaction just
after the fs roots were freed;
2) A transaction leak, because the worker can create a transaction
before the fs roots are freed and just after we committed the last
transaction and after we stopped the transaction kthread.
A stack trace example of the crash:
[79011.691214] kernel BUG at lib/radix-tree.c:982!
[79011.692056] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
[79011.693180] CPU: 3 PID: 1394 Comm: kworker/u8:2 Tainted: G W 5.6.0-rc2-btrfs-next-54 #2
(...)
[79011.696789] Workqueue: btrfs-delayed-meta btrfs_work_helper [btrfs]
[79011.697904] RIP: 0010:radix_tree_tag_set+0xe7/0x170
(...)
[79011.702014] RSP: 0018:ffffb3c84a317ca0 EFLAGS: 00010293
[79011.702949] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[79011.704202] RDX: ffffb3c84a317cb0 RSI: ffffb3c84a317ca8 RDI: ffff8db3931340a0
[79011.705463] RBP: 0000000000000005 R08: 0000000000000005 R09: ffffffff974629d0
[79011.706756] R10: ffffb3c84a317bc0 R11: 0000000000000001 R12: ffff8db393134000
[79011.708010] R13: ffff8db3931340a0 R14: ffff8db393134068 R15: 0000000000000001
[79011.709270] FS: 0000000000000000(0000) GS:ffff8db3b6a00000(0000) knlGS:0000000000000000
[79011.710699] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[79011.711710] CR2: 00007f22c2a0a000 CR3: 0000000232ad4005 CR4: 00000000003606e0
[79011.712958] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[79011.714205] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[79011.715448] Call Trace:
[79011.715925] record_root_in_trans+0x72/0xf0 [btrfs]
[79011.716819] btrfs_record_root_in_trans+0x4b/0x70 [btrfs]
[79011.717925] start_transaction+0xdd/0x5c0 [btrfs]
[79011.718829] btrfs_async_run_delayed_root+0x17e/0x2b0 [btrfs]
[79011.719915] btrfs_work_helper+0xaa/0x720 [btrfs]
[79011.720773] process_one_work+0x26d/0x6a0
[79011.721497] worker_thread+0x4f/0x3e0
[79011.722153] ? process_one_work+0x6a0/0x6a0
[79011.722901] kthread+0x103/0x140
[79011.723481] ? kthread_create_worker_on_cpu+0x70/0x70
[79011.724379] ret_from_fork+0x3a/0x50
(...)
The following diagram shows a sequence of steps that lead to the crash
during ummount of the filesystem:
CPU 1 CPU 2 CPU 3
btrfs_punch_hole()
btrfs_btree_balance_dirty()
btrfs_balance_delayed_items()
--> sees
fs_info->delayed_root->items
with value 200, which is greater
than
BTRFS_DELAYED_BACKGROUND (128)
and smaller than
BTRFS_DELAYED_WRITEBACK (512)
btrfs_wq_run_delayed_node()
--> queues a job for
fs_info->delayed_workers to run
btrfs_async_run_delayed_root()
btrfs_async_run_delayed_root()
--> job queued by CPU 1
--> starts picking and running
delayed nodes from the
prepare_list list
close_ctree()
btrfs_delete_unused_bgs()
btrfs_commit_super()
btrfs_join_transaction()
--> gets transaction N
btrfs_commit_transaction(N)
--> set transaction state
to TRANTS_STATE_COMMIT_START
btrfs_first_prepared_delayed_node()
--> picks delayed node X through
the prepared_list list
btrfs_run_delayed_items()
btrfs_first_delayed_node()
--> also picks delayed node X
but through the node_list
list
__btrfs_commit_inode_delayed_items()
--> runs all delayed items from
this node and drops the
node's item count to 0
through call to
btrfs_release_delayed_inode()
--> finishes running any remaining
delayed nodes
--> finishes transaction commit
--> stops cleaner and transaction threads
btrfs_free_fs_roots()
--> frees all roots and removes them
from the radix tree
fs_info->fs_roots_radix
btrfs_join_transaction()
start_transaction()
btrfs_record_root_in_trans()
record_root_in_trans()
radix_tree_tag_set()
--> crashes because
the root is not in
the radix tree
anymore
If the worker is able to call btrfs_join_transaction() before the unmount
task frees the fs roots, we end up leaking a transaction and all its
resources, since after the call to btrfs_commit_super() and stopping the
transaction kthread, we don't expect to have any transaction open anymore.
When this situation happens the worker has a delayed node that has no
more items to run, since the task calling btrfs_run_delayed_items(),
which is doing a transaction commit, picks the same node and runs all
its items first.
We can not wait for the worker to complete when running delayed items
through btrfs_run_delayed_items(), because we call that function in
several phases of a transaction commit, and that could cause a deadlock
because the worker calls btrfs_join_transaction() and the task doing the
transaction commit may have already set the transaction state to
TRANS_STATE_COMMIT_DOING.
Also it's not possible to get into a situation where only some of the
items of a delayed node are added to the fs/subvolume tree in the current
transaction and the remaining ones in the next transaction, because when
running the items of a delayed inode we lock its mutex, effectively
waiting for the worker if the worker is running the items of the delayed
node already.
Since this can only cause issues when unmounting a filesystem, fix it in
a simple way by waiting for any jobs on the delayed workers queue before
calling btrfs_commit_supper() at close_ctree(). This works because at this
point no one can call btrfs_btree_balance_dirty() or
btrfs_balance_delayed_items(), and if we end up waiting for any worker to
complete, btrfs_commit_super() will commit the transaction created by the
worker.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 1d32a07bb2d1..309516e6a968 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -395,3 +395,11 @@ void btrfs_set_work_high_priority(struct btrfs_work *work)
{
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
}
+
+void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
+{
+ if (wq->high)
+ flush_workqueue(wq->high->normal_wq);
+
+ flush_workqueue(wq->normal->normal_wq);
+}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index a4434301d84d..3204daa51b95 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -44,5 +44,6 @@ void btrfs_set_work_high_priority(struct btrfs_work *work);
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
+void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f01f19c76cf5..483b54077d01 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4067,6 +4067,19 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*/
btrfs_delete_unused_bgs(fs_info);
+ /*
+ * There might be existing delayed inode workers still running
+ * and holding an empty delayed inode item. We must wait for
+ * them to complete first because they can create a transaction.
+ * This happens when someone calls btrfs_balance_delayed_items()
+ * and then a transaction commit runs the same delayed nodes
+ * before any delayed worker has done something with the nodes.
+ * We must wait for any worker here and not at transaction
+ * commit time since that could cause a deadlock.
+ * This is a very rare case.
+ */
+ btrfs_flush_workqueue(fs_info->delayed_workers);
+
ret = btrfs_commit_super(fs_info);
if (ret)
btrfs_err(fs_info, "commit super ret %d", ret);