From: Paul E. McKenney <paulmck(a)kernel.org>
commit bc31e6cb27a9334140ff2f0a209d59b08bc0bc8c upstream.
Holding a mutex across synchronize_rcu_tasks() and acquiring
that same mutex in code called from do_exit() after its call to
exit_tasks_rcu_start() but before its call to exit_tasks_rcu_stop()
results in deadlock. This is by design, because tasks that are far
enough into do_exit() are no longer present on the tasks list, making
it a bit difficult for RCU Tasks to find them, let alone wait on them
to do a voluntary context switch. However, such deadlocks are becoming
more frequent. In addition, lockdep currently does not detect such
deadlocks and they can be difficult to reproduce.
In addition, if a task voluntarily context switches during that time
(for example, if it blocks acquiring a mutex), then this task is in an
RCU Tasks quiescent state. And with some adjustments, RCU Tasks could
just as well take advantage of that fact.
This commit therefore eliminates these deadlock by replacing the
SRCU-based wait for do_exit() completion with per-CPU lists of tasks
currently exiting. A given task will be on one of these per-CPU lists for
the same period of time that this task would previously have been in the
previous SRCU read-side critical section. These lists enable RCU Tasks
to find the tasks that have already been removed from the tasks list,
but that must nevertheless be waited upon.
The RCU Tasks grace period gathers any of these do_exit() tasks that it
must wait on, and adds them to the list of holdouts. Per-CPU locking
and get_task_struct() are used to synchronize addition to and removal
from these lists.
Link: https://lore.kernel.org/all/20240118021842.290665-1-chenzhongjin@huawei.com/
Reported-by: Chen Zhongjin <chenzhongjin(a)huawei.com>
Signed-off-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Zqiang <qiang.zhang1211(a)gmail.com>
---
include/linux/sched.h | 1 +
init/init_task.c | 1 +
kernel/fork.c | 1 +
kernel/rcu/update.c | 65 ++++++++++++++++++++++++++++++-------------
4 files changed, 49 insertions(+), 19 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fd4899236037..0b555d8e9d5e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -679,6 +679,7 @@ struct task_struct {
u8 rcu_tasks_idx;
int rcu_tasks_idle_cpu;
struct list_head rcu_tasks_holdout_list;
+ struct list_head rcu_tasks_exit_list;
#endif /* #ifdef CONFIG_TASKS_RCU */
struct sched_info sched_info;
diff --git a/init/init_task.c b/init/init_task.c
index 994ffe018120..f741cbfd891c 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -139,6 +139,7 @@ struct task_struct init_task
.rcu_tasks_holdout = false,
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
.rcu_tasks_idle_cpu = -1,
+ .rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list),
#endif
#ifdef CONFIG_CPUSETS
.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
diff --git a/kernel/fork.c b/kernel/fork.c
index b65871600507..d416d16df62f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1626,6 +1626,7 @@ static inline void rcu_copy_process(struct task_struct *p)
p->rcu_tasks_holdout = false;
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
p->rcu_tasks_idle_cpu = -1;
+ INIT_LIST_HEAD(&p->rcu_tasks_exit_list);
#endif /* #ifdef CONFIG_TASKS_RCU */
}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 81688a133552..5227cb5c1bea 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -527,7 +527,8 @@ static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
/* Track exiting tasks in order to allow them to be waited for. */
-DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
+static LIST_HEAD(rtp_exit_list);
+static DEFINE_RAW_SPINLOCK(rtp_exit_list_lock);
/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
@@ -661,6 +662,17 @@ static void check_holdout_task(struct task_struct *t,
sched_show_task(t);
}
+static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
+{
+ if (t != current && READ_ONCE(t->on_rq) &&
+ !is_idle_task(t)) {
+ get_task_struct(t);
+ t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
+ WRITE_ONCE(t->rcu_tasks_holdout, true);
+ list_add(&t->rcu_tasks_holdout_list, hop);
+ }
+}
+
/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
static int __noreturn rcu_tasks_kthread(void *arg)
{
@@ -726,14 +738,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
*/
rcu_read_lock();
for_each_process_thread(g, t) {
- if (t != current && READ_ONCE(t->on_rq) &&
- !is_idle_task(t)) {
- get_task_struct(t);
- t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
- WRITE_ONCE(t->rcu_tasks_holdout, true);
- list_add(&t->rcu_tasks_holdout_list,
- &rcu_tasks_holdouts);
- }
+ rcu_tasks_pertask(t, &rcu_tasks_holdouts);
}
rcu_read_unlock();
@@ -744,8 +749,12 @@ static int __noreturn rcu_tasks_kthread(void *arg)
* where they have disabled preemption, allowing the
* later synchronize_sched() to finish the job.
*/
- synchronize_srcu(&tasks_rcu_exit_srcu);
-
+ raw_spin_lock_irqsave(&rtp_exit_list_lock, flags);
+ list_for_each_entry(t, &rtp_exit_list, rcu_tasks_exit_list) {
+ if (list_empty(&t->rcu_tasks_holdout_list))
+ rcu_tasks_pertask(t, &rcu_tasks_holdouts);
+ }
+ raw_spin_unlock_irqrestore(&rtp_exit_list_lock, flags);
/*
* Each pass through the following loop scans the list
* of holdout tasks, removing any that are no longer
@@ -802,8 +811,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
*
* In addition, this synchronize_sched() waits for exiting
* tasks to complete their final preempt_disable() region
- * of execution, cleaning up after the synchronize_srcu()
- * above.
+ * of execution.
*/
synchronize_sched();
@@ -834,20 +842,39 @@ static int __init rcu_spawn_tasks_kthread(void)
}
core_initcall(rcu_spawn_tasks_kthread);
-/* Do the srcu_read_lock() for the above synchronize_srcu(). */
+/*
+ * Protect against tasklist scan blind spot while the task is exiting and
+ * may be removed from the tasklist. Do this by adding the task to yet
+ * another list.
+ */
void exit_tasks_rcu_start(void)
{
+ unsigned long flags;
+ struct task_struct *t = current;
+
+ WARN_ON_ONCE(!list_empty(&t->rcu_tasks_exit_list));
+ get_task_struct(t);
preempt_disable();
- current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
+ raw_spin_lock_irqsave(&rtp_exit_list_lock, flags);
+ list_add(&t->rcu_tasks_exit_list, &rtp_exit_list);
+ raw_spin_unlock_irqrestore(&rtp_exit_list_lock, flags);
preempt_enable();
}
-/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
+/*
+ * Remove the task from the "yet another list" because do_exit() is now
+ * non-preemptible, allowing synchronize_rcu() to wait beyond this point.
+ */
void exit_tasks_rcu_finish(void)
{
- preempt_disable();
- __srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
- preempt_enable();
+ unsigned long flags;
+ struct task_struct *t = current;
+
+ WARN_ON_ONCE(list_empty(&t->rcu_tasks_exit_list));
+ raw_spin_lock_irqsave(&rtp_exit_list_lock, flags);
+ list_del_init(&t->rcu_tasks_exit_list);
+ raw_spin_unlock_irqrestore(&rtp_exit_list_lock, flags);
+ put_task_struct(t);
}
#endif /* #ifdef CONFIG_TASKS_RCU */
--
2.17.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d877550eaf2dc9090d782864c96939397a3c6835
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021941-reprimand-grudge-7734@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
d877550eaf2d ("x86/fpu: Stop relying on userspace for info to fault in xsave buffer")
c03098d4b9ad ("Merge tag 'gfs2-v5.15-rc5-mmap-fault' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d877550eaf2dc9090d782864c96939397a3c6835 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin(a)google.com>
Date: Mon, 29 Jan 2024 22:36:03 -0800
Subject: [PATCH] x86/fpu: Stop relying on userspace for info to fault in xsave
buffer
Before this change, the expected size of the user space buffer was
taken from fx_sw->xstate_size. fx_sw->xstate_size can be changed
from user-space, so it is possible construct a sigreturn frame where:
* fx_sw->xstate_size is smaller than the size required by valid bits in
fx_sw->xfeatures.
* user-space unmaps parts of the sigrame fpu buffer so that not all of
the buffer required by xrstor is accessible.
In this case, xrstor tries to restore and accesses the unmapped area
which results in a fault. But fault_in_readable succeeds because buf +
fx_sw->xstate_size is within the still mapped area, so it goes back and
tries xrstor again. It will spin in this loop forever.
Instead, fault in the maximum size which can be touched by XRSTOR (taken
from fpstate->user_size).
[ dhansen: tweak subject / changelog ]
Fixes: fcb3635f5018 ("x86/fpu/signal: Handle #PF in the direct restore path")
Reported-by: Konstantin Bogomolov <bogomolov(a)google.com>
Suggested-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Andrei Vagin <avagin(a)google.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 558076dbde5b..247f2225aa9f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
* Attempt to restore the FPU registers directly from user memory.
* Pagefaults are handled and any errors returned are fatal.
*/
-static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
- bool fx_only, unsigned int size)
+static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
struct fpu *fpu = ¤t->thread.fpu;
int ret;
+ /* Restore enabled features only. */
+ xrestore &= fpu->fpstate->user_xfeatures;
retry:
fpregs_lock();
/* Ensure that XFD is up to date */
@@ -309,7 +310,7 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
if (ret != X86_TRAP_PF)
return false;
- if (!fault_in_readable(buf, size))
+ if (!fault_in_readable(buf, fpu->fpstate->user_size))
goto retry;
return false;
}
@@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
- unsigned int state_size;
u64 user_xfeatures = 0;
if (use_xsave()) {
@@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
return false;
fx_only = !fx_sw_user.magic1;
- state_size = fx_sw_user.xstate_size;
user_xfeatures = fx_sw_user.xfeatures;
} else {
user_xfeatures = XFEATURE_MASK_FPSSE;
- state_size = fpu->fpstate->user_size;
}
if (likely(!ia32_fxstate)) {
/* Restore the FPU registers directly from user memory. */
- return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
- state_size);
+ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
}
/*
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012650-altitude-gush-572f@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
3c12466b6b7b ("erofs: fix lz4 inplace decompression")
123ec246ebe3 ("erofs: get rid of the remaining kmap_atomic()")
ab749badf9f4 ("erofs: support unaligned data decompression")
10e5f6e482e1 ("erofs: introduce z_erofs_fixup_insize")
d67aee76d418 ("erofs: tidy up z_erofs_lz4_decompress")
7e508f2ca8bb ("erofs: rename lz4_0pading to zero_padding")
eaa9172ad988 ("erofs: get rid of ->lru usage")
622ceaddb764 ("erofs: lzma compression support")
966edfb0a3dc ("erofs: rename some generic methods in decompressor")
386292919c25 ("erofs: introduce readmore decompression strategy")
8f89926290c4 ("erofs: get compression algorithms directly on mapping")
dfeab2e95a75 ("erofs: add multiple device support")
e62424651f43 ("erofs: decouple basic mount options from fs_context")
5b6e7e120e71 ("erofs: remove the fast path of per-CPU buffer decompression")
2e5fd489a4e5 ("Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang(a)kernel.org>
Date: Wed, 6 Dec 2023 12:55:34 +0800
Subject: [PATCH] erofs: fix lz4 inplace decompression
Currently EROFS can map another compressed buffer for inplace
decompression, that was used to handle the cases that some pages of
compressed data are actually not in-place I/O.
However, like most simple LZ77 algorithms, LZ4 expects the compressed
data is arranged at the end of the decompressed buffer and it
explicitly uses memmove() to handle overlapping:
__________________________________________________________
|_ direction of decompression --> ____ |_ compressed data _|
Although EROFS arranges compressed data like this, it typically maps two
individual virtual buffers so the relative order is uncertain.
Previously, it was hardly observed since LZ4 only uses memmove() for
short overlapped literals and x86/arm64 memmove implementations seem to
completely cover it up and they don't have this issue. Juhyung reported
that EROFS data corruption can be found on a new Intel x86 processor.
After some analysis, it seems that recent x86 processors with the new
FSRM feature expose this issue with "rep movsb".
Let's strictly use the decompressed buffer for lz4 inplace
decompression for now. Later, as an useful improvement, we could try
to tie up these two buffers together in the correct order.
Reported-and-tested-by: Juhyung Park <qkrwngud825(a)gmail.com>
Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR…
Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace")
Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend")
Cc: stable <stable(a)vger.kernel.org> # 5.4+
Tested-by: Yifan Zhao <zhaoyifan(a)sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao(a)linux.alibaba.com>
Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.…
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 021be5feb1bc..e0d609c3958f 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -121,11 +121,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
}
static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
- void *inpage, unsigned int *inputmargin, int *maptype,
- bool may_inplace)
+ void *inpage, void *out, unsigned int *inputmargin,
+ int *maptype, bool may_inplace)
{
struct z_erofs_decompress_req *rq = ctx->rq;
- unsigned int omargin, total, i, j;
+ unsigned int omargin, total, i;
struct page **in;
void *src, *tmp;
@@ -135,12 +135,13 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;
- for (i = 0; i < ctx->inpages; ++i) {
- DBG_BUGON(rq->in[i] == NULL);
- for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j)
- if (rq->out[j] == rq->in[i])
- goto docopy;
- }
+ for (i = 0; i < ctx->inpages; ++i)
+ if (rq->out[ctx->outpages - ctx->inpages + i] !=
+ rq->in[i])
+ goto docopy;
+ kunmap_local(inpage);
+ *maptype = 3;
+ return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT);
}
if (ctx->inpages <= 1) {
@@ -148,7 +149,6 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
return inpage;
}
kunmap_local(inpage);
- might_sleep();
src = erofs_vm_map_ram(rq->in, ctx->inpages);
if (!src)
return ERR_PTR(-ENOMEM);
@@ -204,12 +204,12 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
}
static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
- u8 *out)
+ u8 *dst)
{
struct z_erofs_decompress_req *rq = ctx->rq;
bool support_0padding = false, may_inplace = false;
unsigned int inputmargin;
- u8 *headpage, *src;
+ u8 *out, *headpage, *src;
int ret, maptype;
DBG_BUGON(*rq->in == NULL);
@@ -230,11 +230,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
}
inputmargin = rq->pageofs_in;
- src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
+ src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin,
&maptype, may_inplace);
if (IS_ERR(src))
return PTR_ERR(src);
+ out = dst + rq->pageofs_out;
/* legacy format could compress extra data in a pcluster. */
if (rq->partial_decoding || !support_0padding)
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
@@ -265,7 +266,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
- } else {
+ } else if (maptype != 3) {
DBG_BUGON(1);
return -EFAULT;
}
@@ -308,7 +309,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
}
dstmap_out:
- ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
+ ret = z_erofs_lz4_decompress_mem(&ctx, dst);
if (!dst_maptype)
kunmap_local(dst);
else if (dst_maptype == 2)
From: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol(a)tdk.com>
Now that we are reading the full FIFO in the interrupt handler,
it is possible to have an emply FIFO since we are still receiving
1 interrupt per data. Handle correctly this case instead of having
an error causing a reset of the FIFO.
Fixes: 0829edc43e0a ("iio: imu: inv_mpu6050: read the full fifo when processing data")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol(a)tdk.com>
---
V2: add missing stable tag
drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
index 66d4ba088e70..d4f9b5d8d28d 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
@@ -109,6 +109,8 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p)
/* compute and process only all complete datum */
nb = fifo_count / bytes_per_datum;
fifo_count = nb * bytes_per_datum;
+ if (nb == 0)
+ goto end_session;
/* Each FIFO data contains all sensors, so same number for FIFO and sensor data */
fifo_period = NSEC_PER_SEC / INV_MPU6050_DIVIDER_TO_FIFO_RATE(st->chip_config.divider);
inv_sensors_timestamp_interrupt(&st->timestamp, fifo_period, nb, nb, pf->timestamp);
--
2.34.1
Resolving a frequency to an efficient one should not transgress policy->max
(which can be set for thermal reason) and policy->min. Currently there is
possibility where scaling_cur_freq can exceed scaling_max_freq when
scaling_max_freq is inefficient frequency. Add additional check to ensure
that resolving a frequency will respect policy->min/max.
Fixes: 1f39fa0dccff ("cpufreq: Introducing CPUFREQ_RELATION_E")
Signed-off-by: Shivnandan Kumar <quic_kshivnan(a)quicinc.com>
---
include/linux/cpufreq.h | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index afda5f24d3dd..42d98b576a36 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1021,6 +1021,19 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
efficiencies);
}
+static inline bool cpufreq_table_index_is_in_limits(struct cpufreq_policy *policy,
+ int idx)
+{
+ unsigned int freq;
+
+ if (idx < 0)
+ return false;
+
+ freq = policy->freq_table[idx].frequency;
+
+ return (freq == clamp_val(freq, policy->min, policy->max));
+}
+
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -1054,7 +1067,10 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
return 0;
}
- if (idx < 0 && efficiencies) {
+ /*
+ * Limit frequency index to honor policy->min/max
+ */
+ if (!cpufreq_table_index_is_in_limits(policy, idx) && efficiencies) {
efficiencies = false;
goto retry;
}
--
2.25.1
Resolving a frequency to an efficient one should not transgress policy->max
(which can be set for thermal reason) and policy->min. Currently there is
possibility where scaling_cur_freq can exceed scaling_max_freq when
scaling_max_freq is inefficient frequency. Add additional check to ensure
that resolving a frequency will respect policy->min/max.
Cc: <stable(a)vger.kernel.org>
Fixes: 1f39fa0dccff ("cpufreq: Introducing CPUFREQ_RELATION_E")
Signed-off-by: Shivnandan Kumar <quic_kshivnan(a)quicinc.com>
--
Changes in v2:
-rename function name from cpufreq_table_index_is_in_limits to cpufreq_is_in_limits
-remove redundant outer parenthesis in return statement
-Make comment single line
--
---
include/linux/cpufreq.h | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index afda5f24d3dd..7741244dee6e 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1021,6 +1021,19 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
efficiencies);
}
+static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy,
+ int idx)
+{
+ unsigned int freq;
+
+ if (idx < 0)
+ return false;
+
+ freq = policy->freq_table[idx].frequency;
+
+ return freq == clamp_val(freq, policy->min, policy->max);
+}
+
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -1054,7 +1067,8 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
return 0;
}
- if (idx < 0 && efficiencies) {
+ /* Limit frequency index to honor policy->min/max */
+ if (!cpufreq_is_in_limits(policy, idx) && efficiencies) {
efficiencies = false;
goto retry;
}
--
2.25.1