Avoid data corruption by rejecting pass-through commands where
T_LENGTH is zero (No data is transferred) and the dma direction
is not DMA_NONE.
Reported-by: syzkaller <syzkaller(a)googlegroups.com>
Signed-off-by: George Kennedy <george.kennedy(a)oracle.com>
---
drivers/ata/libata-scsi.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 1b84d55..d428392 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2859,6 +2859,12 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
goto invalid_fld;
}
+ /* if T_LENGTH is zero (No data is transferred), then dir should be DMA_NONE */
+ if ((cdb[2 + cdb_offset] & 3) == 0 && scmd->sc_data_direction != DMA_NONE) {
+ fp = 2 + cdb_offset;
+ goto invalid_fld;
+ }
+
if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0)
tf->protocol = ATA_PROT_NCQ_NODATA;
--
1.8.3.1
BFQ started crashing with 5.15-based kernels like:
BUG: KASAN: use-after-free in rb_erase (lib/rbtree.c:262 lib/rbtr
Read of size 8 at addr ffff888008193098 by task bash/1472
CPU: 0 PID: 1472 Comm: bash Tainted: G E 5.15.2-0.
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1
Call Trace:
rb_erase (lib/rbtree.c:262 lib/rbtree.c:445)
bfq_idle_extract (block/bfq-wf2q.c:356)
bfq_put_idle_entity (block/bfq-wf2q.c:660)
bfq_bfqq_served (block/bfq-wf2q.c:833)
bfq_dispatch_request (block/bfq-iosched.c:4870 block/bfq-iosched.
__blk_mq_do_dispatch_sched (block/blk-mq-sched.c:150)
__blk_mq_sched_dispatch_requests (block/blk-mq-sched.c:215 block/
blk_mq_sched_dispatch_requests (block/blk-mq-sched.c:360)
blk_mq_sched_insert_requests (include/linux/percpu-refcount.h:174
blk_mq_flush_plug_list (include/linux/list.h:282 block/blk-mq.c:1
blk_flush_plug_list (block/blk-core.c:1722)
blk_finish_plug (block/blk-core.c:1745 block/blk-core.c:1739)
read_pages (include/linux/list.h:282 mm/readahead.c:152)
page_cache_ra_unbounded (mm/readahead.c:212 (discriminator 2))
filemap_fault (mm/filemap.c:2982 mm/filemap.c:3074)
__do_fault (mm/memory.c:3858)
__handle_mm_fault (mm/memory.c:4182 mm/memory.c:4310 mm/memory.c:
handle_mm_fault (mm/memory.c:4801)
After some analysis we've found out that the culprit of the problem is
that some task is reparented from cgroup G to the root cgroup and G is
offlined. But a bfq_queue in task's IO context still points to G as its
parent and thus when task submits more IO, G is inserted into service
trees. Once the task exits and bfq_queue is destroyed, the last
reference to G is dropped as well and G is freed but it is still linked
from service trees causing use-after-free issues sometime later.
Fix the problem by tracking all bfq_queues that point to a particular
cgroup as their parent and reparent them when the cgroup is going
offline.
CC: stable(a)vger.kernel.org
Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support")
Tested-by: Fabian Vogt <fvogt(a)suse.com>
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
block/bfq-cgroup.c | 100 ++++++--------------------------------------
block/bfq-iosched.c | 54 ++++++++++++------------
block/bfq-iosched.h | 6 +++
3 files changed, 47 insertions(+), 113 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 24a5c5329bcd..519e6291e98e 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -666,6 +666,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
else if (entity->on_st_or_in_serv)
bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
+ hlist_del(&bfqq->children_node);
bfqg_and_blkg_put(bfqq_group(bfqq));
if (entity->parent &&
@@ -678,6 +679,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
entity->sched_data = &bfqg->sched_data;
/* pin down bfqg and its associated blkg */
bfqg_and_blkg_get(bfqg);
+ hlist_add_head(&bfqq->children_node, &bfqg->children);
if (bfq_bfqq_busy(bfqq)) {
if (unlikely(!bfqd->nonrot_with_queueing))
@@ -810,68 +812,13 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
rcu_read_unlock();
}
-/**
- * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
- * @st: the service tree being flushed.
- */
-static void bfq_flush_idle_tree(struct bfq_service_tree *st)
-{
- struct bfq_entity *entity = st->first_idle;
-
- for (; entity ; entity = st->first_idle)
- __bfq_deactivate_entity(entity, false);
-}
-
-/**
- * bfq_reparent_leaf_entity - move leaf entity to the root_group.
- * @bfqd: the device data structure with the root group.
- * @entity: the entity to move, if entity is a leaf; or the parent entity
- * of an active leaf entity to move, if entity is not a leaf.
- */
-static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
- struct bfq_entity *entity,
- int ioprio_class)
+static void bfq_reparent_children(struct bfq_data *bfqd, struct bfq_group *bfqg)
{
struct bfq_queue *bfqq;
- struct bfq_entity *child_entity = entity;
-
- while (child_entity->my_sched_data) { /* leaf not reached yet */
- struct bfq_sched_data *child_sd = child_entity->my_sched_data;
- struct bfq_service_tree *child_st = child_sd->service_tree +
- ioprio_class;
- struct rb_root *child_active = &child_st->active;
-
- child_entity = bfq_entity_of(rb_first(child_active));
-
- if (!child_entity)
- child_entity = child_sd->in_service_entity;
- }
-
- bfqq = bfq_entity_to_bfqq(child_entity);
- bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
-}
-
-/**
- * bfq_reparent_active_queues - move to the root group all active queues.
- * @bfqd: the device data structure with the root group.
- * @bfqg: the group to move from.
- * @st: the service tree to start the search from.
- */
-static void bfq_reparent_active_queues(struct bfq_data *bfqd,
- struct bfq_group *bfqg,
- struct bfq_service_tree *st,
- int ioprio_class)
-{
- struct rb_root *active = &st->active;
- struct bfq_entity *entity;
-
- while ((entity = bfq_entity_of(rb_first(active))))
- bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
+ struct hlist_node *next;
- if (bfqg->sched_data.in_service_entity)
- bfq_reparent_leaf_entity(bfqd,
- bfqg->sched_data.in_service_entity,
- ioprio_class);
+ hlist_for_each_entry_safe(bfqq, next, &bfqg->children, children_node)
+ bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
}
/**
@@ -897,38 +844,17 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
goto put_async_queues;
/*
- * Empty all service_trees belonging to this group before
- * deactivating the group itself.
+ * Reparent all bfqqs under this bfq group. This will also empty all
+ * service_trees belonging to this group before deactivating the group
+ * itself.
*/
+ bfq_reparent_children(bfqd, bfqg);
+
for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
st = bfqg->sched_data.service_tree + i;
- /*
- * It may happen that some queues are still active
- * (busy) upon group destruction (if the corresponding
- * processes have been forced to terminate). We move
- * all the leaf entities corresponding to these queues
- * to the root_group.
- * Also, it may happen that the group has an entity
- * in service, which is disconnected from the active
- * tree: it must be moved, too.
- * There is no need to put the sync queues, as the
- * scheduler has taken no reference.
- */
- bfq_reparent_active_queues(bfqd, bfqg, st, i);
-
- /*
- * The idle tree may still contain bfq_queues
- * belonging to exited task because they never
- * migrated to a different cgroup from the one being
- * destroyed now. In addition, even
- * bfq_reparent_active_queues() may happen to add some
- * entities to the idle tree. It happens if, in some
- * of the calls to bfq_bfqq_move() performed by
- * bfq_reparent_active_queues(), the queue to move is
- * empty and gets expired.
- */
- bfq_flush_idle_tree(st);
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&st->active));
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&st->idle));
}
__bfq_deactivate_entity(entity, false);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index fec18118dc30..8f33f6b91d05 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5163,6 +5163,7 @@ void bfq_put_queue(struct bfq_queue *bfqq)
if (bfqq->bfqd && bfqq->bfqd->last_completed_rq_bfqq == bfqq)
bfqq->bfqd->last_completed_rq_bfqq = NULL;
+ hlist_del(&bfqq->children_node);
kmem_cache_free(bfq_pool, bfqq);
bfqg_and_blkg_put(bfqg);
}
@@ -5337,8 +5338,9 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
bfq_set_next_ioprio_data(bfqq, bic);
}
-static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
- struct bfq_io_cq *bic, pid_t pid, int is_sync)
+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_group *bfqg,
+ struct bfq_queue *bfqq, struct bfq_io_cq *bic,
+ pid_t pid, int is_sync)
{
u64 now_ns = ktime_get_ns();
@@ -5347,6 +5349,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
INIT_HLIST_NODE(&bfqq->burst_list_node);
INIT_HLIST_NODE(&bfqq->woken_list_node);
INIT_HLIST_HEAD(&bfqq->woken_list);
+ hlist_add_head(&bfqq->children_node, &bfqg->children);
bfqq->ref = 0;
bfqq->bfqd = bfqd;
@@ -5600,8 +5603,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
bfqd->queue->node);
if (bfqq) {
- bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
- is_sync);
+ bfq_init_bfqq(bfqd, bfqg, bfqq, bic, current->pid, is_sync);
bfq_init_entity(&bfqq->entity, bfqg);
bfq_log_bfqq(bfqd, bfqq, "allocated");
} else {
@@ -6908,6 +6910,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
hrtimer_cancel(&bfqd->idle_slice_timer);
+ hlist_del(&bfqd->oom_bfqq.children_node);
/* release oom-queue reference to root group */
bfqg_and_blkg_put(bfqd->root_group);
@@ -6959,28 +6962,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
q->elevator = eq;
spin_unlock_irq(&q->queue_lock);
- /*
- * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
- * Grab a permanent reference to it, so that the normal code flow
- * will not attempt to free it.
- */
- bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
- bfqd->oom_bfqq.ref++;
- bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
- bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
- bfqd->oom_bfqq.entity.new_weight =
- bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
-
- /* oom_bfqq does not participate to bursts */
- bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
-
- /*
- * Trigger weight initialization, according to ioprio, at the
- * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
- * class won't be changed any more.
- */
- bfqd->oom_bfqq.entity.prio_changed = 1;
-
bfqd->queue = q;
INIT_LIST_HEAD(&bfqd->dispatch);
@@ -7059,6 +7040,27 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
goto out_free;
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+ /*
+ * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
+ * Grab a permanent reference to it, so that the normal code flow
+ * will not attempt to free it.
+ */
+ bfq_init_bfqq(bfqd, bfqd->root_group, &bfqd->oom_bfqq, NULL, 1, 0);
+ bfqd->oom_bfqq.ref++;
+ bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+ bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
+ bfqd->oom_bfqq.entity.new_weight =
+ bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
+
+ /* oom_bfqq does not participate to bursts */
+ bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
+
+ /*
+ * Trigger weight initialization, according to ioprio, at the
+ * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
+ * class won't be changed any more.
+ */
+ bfqd->oom_bfqq.entity.prio_changed = 1;
wbt_disable_default(q);
return 0;
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index a73488eec8a4..a1984959d6be 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -292,6 +292,9 @@ struct bfq_queue {
/* node for active/idle bfqq list inside parent bfqd */
struct list_head bfqq_list;
+ /* Member of parent's bfqg children list */
+ struct hlist_node children_node;
+
/* associated @bfq_ttime struct */
struct bfq_ttime ttime;
@@ -929,6 +932,9 @@ struct bfq_group {
struct bfq_entity entity;
struct bfq_sched_data sched_data;
+ /* bfq_queues under this entity */
+ struct hlist_head children;
+
void *bfqd;
struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS];
--
2.26.2