When the process is migrated to a different cgroup (or in case of
writeback just starts submitting bios associated with a different
cgroup) bfq_merge_bio() can operate with stale cgroup information in
bic. Thus the bio can be merged to a request from a different cgroup or
it can result in merging of bfqqs for different cgroups or bfqqs of
already dead cgroups and causing possible use-after-free issues. Fix the
problem by updating cgroup information in bfq_merge_bio().
CC: stable(a)vger.kernel.org
Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support")
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
block/bfq-cgroup.c | 40 ++++++++++++++++++++++------------------
block/bfq-iosched.c | 11 +++++++++--
2 files changed, 31 insertions(+), 20 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index dbc117e00783..f6f5f156b9f2 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -729,30 +729,34 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
}
if (sync_bfqq) {
- entity = &sync_bfqq->entity;
- if (entity->sched_data != &bfqg->sched_data) {
+ struct bfq_queue *orig_bfqq = sync_bfqq;
+
+ /* Traverse the merge chain to bfqq we will be using */
+ while (sync_bfqq->new_bfqq)
+ sync_bfqq = sync_bfqq->new_bfqq;
+ /*
+ * Target bfqq got moved to a different cgroup or this process
+ * started submitting bios for different cgroup?
+ */
+ if (sync_bfqq->entity.sched_data != &bfqg->sched_data) {
/*
* Was the queue we use merged to a different queue?
- * Detach process from the queue as merge need not be
- * valid anymore. We cannot easily cancel the merge as
- * there may be other processes scheduled to this
- * queue.
+ * Detach process from the queue as the merge is not
+ * valid anymore. We cannot easily just cancel the
+ * merge (by clearing new_bfqq) as there may be other
+ * processes using this queue and holding refs to all
+ * queues below sync_bfqq->new_bfqq. Similarly if the
+ * merge already happened, we need to detach from bfqq
+ * now so that we cannot merge bio to a request from
+ * the old cgroup.
*/
- if (sync_bfqq->new_bfqq) {
- bfq_put_cooperator(sync_bfqq);
- bfq_release_process_ref(bfqd, sync_bfqq);
+ if (orig_bfqq != sync_bfqq || bfq_bfqq_coop(sync_bfqq)) {
+ bfq_put_cooperator(orig_bfqq);
+ bfq_release_process_ref(bfqd, orig_bfqq);
bic_set_bfqq(bic, NULL, 1);
return bfqg;
}
- /*
- * Moving bfqq that is shared with another process?
- * Split the queues at the nearest occasion as the
- * processes can be in different cgroups now.
- */
- if (bfq_bfqq_coop(sync_bfqq)) {
- bic->stably_merged = false;
- bfq_mark_bfqq_split_coop(sync_bfqq);
- }
+ /* We are the only user of this bfqq, just move it */
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
}
}
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 361d321b012a..8a088d77a0b6 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2337,10 +2337,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
spin_lock_irq(&bfqd->lock);
- if (bic)
+ if (bic) {
+ /*
+ * Make sure cgroup info is uptodate for current process before
+ * considering the merge.
+ */
+ bfq_bic_update_cgroup(bic, bio);
+
bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
- else
+ } else {
bfqd->bio_bfqq = NULL;
+ }
bfqd->bio_bic = bic;
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
--
2.31.1
When bfqq is shared by multiple processes it can happen that one of the
processes gets moved to a different cgroup (or just starts submitting IO
for different cgroup). In case that happens we need to split the merged
bfqq as otherwise we will have IO for multiple cgroups in one bfqq and
we will just account IO time to wrong entities etc.
Similarly if the bfqq is scheduled to merge with another bfqq but the
merge didn't happen yet, cancel the merge as it need not be valid
anymore.
CC: stable(a)vger.kernel.org
Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support")
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
block/bfq-cgroup.c | 25 ++++++++++++++++++++++++-
block/bfq-iosched.c | 2 +-
block/bfq-iosched.h | 1 +
3 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 24a5c5329bcd..dbc117e00783 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -730,8 +730,31 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
if (sync_bfqq) {
entity = &sync_bfqq->entity;
- if (entity->sched_data != &bfqg->sched_data)
+ if (entity->sched_data != &bfqg->sched_data) {
+ /*
+ * Was the queue we use merged to a different queue?
+ * Detach process from the queue as merge need not be
+ * valid anymore. We cannot easily cancel the merge as
+ * there may be other processes scheduled to this
+ * queue.
+ */
+ if (sync_bfqq->new_bfqq) {
+ bfq_put_cooperator(sync_bfqq);
+ bfq_release_process_ref(bfqd, sync_bfqq);
+ bic_set_bfqq(bic, NULL, 1);
+ return bfqg;
+ }
+ /*
+ * Moving bfqq that is shared with another process?
+ * Split the queues at the nearest occasion as the
+ * processes can be in different cgroups now.
+ */
+ if (bfq_bfqq_coop(sync_bfqq)) {
+ bic->stably_merged = false;
+ bfq_mark_bfqq_split_coop(sync_bfqq);
+ }
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ }
}
return bfqg;
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0da47f2ca781..361d321b012a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5184,7 +5184,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void bfq_put_cooperator(struct bfq_queue *bfqq)
+void bfq_put_cooperator(struct bfq_queue *bfqq)
{
struct bfq_queue *__bfqq, *next;
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index a73488eec8a4..6e250db2138e 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -976,6 +976,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq);
+void bfq_put_cooperator(struct bfq_queue *bfqq);
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_schedule_dispatch(struct bfq_data *bfqd);
--
2.31.1
It can happen that the parent of a bfqq changes between the moment we
decide two queues are worth to merge (and set bic->stable_merge_bfqq)
and the moment bfq_setup_merge() is called. This can happen e.g. because
the process submitted IO for a different cgroup and thus bfqq got
reparented. It can even happen that the bfqq we are merging with has
parent cgroup that is already offline and going to be destroyed in which
case the merge can lead to use-after-free issues such as:
BUG: KASAN: use-after-free in __bfq_deactivate_entity+0x9cb/0xa50
Read of size 8 at addr ffff88800693c0c0 by task runc:[2:INIT]/10544
CPU: 0 PID: 10544 Comm: runc:[2:INIT] Tainted: G E 5.15.2-0.g5fb85fd-default #1 openSUSE Tumbleweed (unreleased) f1f3b891c72369aebecd2e43e4641a6358867c70
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014
Call Trace:
<IRQ>
dump_stack_lvl+0x46/0x5a
print_address_description.constprop.0+0x1f/0x140
? __bfq_deactivate_entity+0x9cb/0xa50
kasan_report.cold+0x7f/0x11b
? __bfq_deactivate_entity+0x9cb/0xa50
__bfq_deactivate_entity+0x9cb/0xa50
? update_curr+0x32f/0x5d0
bfq_deactivate_entity+0xa0/0x1d0
bfq_del_bfqq_busy+0x28a/0x420
? resched_curr+0x116/0x1d0
? bfq_requeue_bfqq+0x70/0x70
? check_preempt_wakeup+0x52b/0xbc0
__bfq_bfqq_expire+0x1a2/0x270
bfq_bfqq_expire+0xd16/0x2160
? try_to_wake_up+0x4ee/0x1260
? bfq_end_wr_async_queues+0xe0/0xe0
? _raw_write_unlock_bh+0x60/0x60
? _raw_spin_lock_irq+0x81/0xe0
bfq_idle_slice_timer+0x109/0x280
? bfq_dispatch_request+0x4870/0x4870
__hrtimer_run_queues+0x37d/0x700
? enqueue_hrtimer+0x1b0/0x1b0
? kvm_clock_get_cycles+0xd/0x10
? ktime_get_update_offsets_now+0x6f/0x280
hrtimer_interrupt+0x2c8/0x740
Fix the problem by checking that the parent of the two bfqqs we are
merging in bfq_setup_merge() is the same.
Link: https://lore.kernel.org/linux-block/20211125172809.GC19572@quack2.suse.cz/
CC: stable(a)vger.kernel.org
Fixes: 430a67f9d616 ("block, bfq: merge bursts of newly-created queues")
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
block/bfq-iosched.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 056399185c2f..0da47f2ca781 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2638,6 +2638,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
if (process_refs == 0 || new_process_refs == 0)
return NULL;
+ /*
+ * Make sure merged queues belong to the same parent. Parents could
+ * have changed since the time we decided the two queues are suitable
+ * for merging.
+ */
+ if (new_bfqq->entity.parent != bfqq->entity.parent)
+ return NULL;
+
bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
new_bfqq->pid);
--
2.31.1