From: Nicolas Saenz Julienne nsaenzju@redhat.com
[ Upstream commit 5c66d1b9b30f737fcef85a0b75bfe0590e16b62a ]
dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having called sched_update_tick_dependency() preventing it from re-enabling the tick on systems that no longer have pending SCHED_RT tasks but have multiple runnable SCHED_OTHER tasks:
dequeue_task_rt() dequeue_rt_entity() dequeue_rt_stack() dequeue_top_rt_rq() sub_nr_running() // decrements rq->nr_running sched_update_tick_dependency() sched_can_stop_tick() // checks rq->rt.rt_nr_running, ... __dequeue_rt_entity() dec_rt_tasks() // decrements rq->rt.rt_nr_running ...
Every other scheduler class performs the operation in the opposite order, and sched_update_tick_dependency() expects the values to be updated as such. So avoid the misbehaviour by inverting the order in which the above operations are performed in the RT scheduler.
Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model") Signed-off-by: Nicolas Saenz Julienne nsaenzju@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Valentin Schneider vschneid@redhat.com Reviewed-by: Phil Auld pauld@redhat.com Link: https://lore.kernel.org/r/20220628092259.330171-1-nsaenzju@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/sched/rt.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7891c0f0e1ff..907a5d7507a8 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -430,7 +430,7 @@ static inline void rt_queue_push_tasks(struct rq *rq) #endif /* CONFIG_SMP */
static void enqueue_top_rt_rq(struct rt_rq *rt_rq); -static void dequeue_top_rt_rq(struct rt_rq *rt_rq); +static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
static inline int on_rt_rq(struct sched_rt_entity *rt_se) { @@ -551,7 +551,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) rt_se = rt_rq->tg->rt_se[cpu];
if (!rt_se) { - dequeue_top_rt_rq(rt_rq); + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); } @@ -637,7 +637,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { - dequeue_top_rt_rq(rt_rq); + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); }
static inline int rt_rq_throttled(struct rt_rq *rt_rq) @@ -1039,7 +1039,7 @@ static void update_curr_rt(struct rq *rq) }
static void -dequeue_top_rt_rq(struct rt_rq *rt_rq) +dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count) { struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -1050,7 +1050,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
BUG_ON(!rq->nr_running);
- sub_nr_running(rq, rt_rq->rt_nr_running); + sub_nr_running(rq, count); rt_rq->rt_queued = 0;
} @@ -1436,18 +1436,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags) { struct sched_rt_entity *back = NULL; + unsigned int rt_nr_running;
for_each_sched_rt_entity(rt_se) { rt_se->back = back; back = rt_se; }
- dequeue_top_rt_rq(rt_rq_of_se(back)); + rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
for (rt_se = back; rt_se; rt_se = rt_se->back) { if (on_rt_rq(rt_se)) __dequeue_rt_entity(rt_se, flags); } + + dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running); }
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)