The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ac2081cdc4d99c57f219c1a6171526e0fa0a6fff Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Thu, 2 Jul 2020 21:16:20 +0100
Subject: [PATCH] arm64: ptrace: Consistently use pseudo-singlestep exceptions
Although the arm64 single-step state machine can be fast-forwarded in
cases where we wish to generate a SIGTRAP without actually executing an
instruction, this has two major limitations outside of simply skipping
an instruction due to emulation.
1. Stepping out of a ptrace signal stop into a signal handler where
SIGTRAP is blocked. Fast-forwarding the stepping state machine in
this case will result in a forced SIGTRAP, with the handler reset to
SIG_DFL.
2. The hardware implicitly fast-forwards the state machine when executing
an SVC instruction for issuing a system call. This can interact badly
with subsequent ptrace stops signalled during the execution of the
system call (e.g. SYSCALL_EXIT or seccomp traps), as they may corrupt
the stepping state by updating the PSTATE for the tracee.
Resolve both of these issues by injecting a pseudo-singlestep exception
on entry to a signal handler and also on return to userspace following a
system call.
Cc: <stable(a)vger.kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Tested-by: Luis Machado <luis.machado(a)linaro.org>
Reported-by: Keno Fischer <keno(a)juliacomputing.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 6ea8b6a26ae9..5e784e16ee89 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -93,6 +93,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
+#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_SVE (1 << TIF_SVE)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 68b7f34a08f5..057d4aa1af4d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1818,12 +1818,23 @@ static void tracehook_report_syscall(struct pt_regs *regs,
saved_reg = regs->regs[regno];
regs->regs[regno] = dir;
- if (dir == PTRACE_SYSCALL_EXIT)
+ if (dir == PTRACE_SYSCALL_ENTER) {
+ if (tracehook_report_syscall_entry(regs))
+ forget_syscall(regs);
+ regs->regs[regno] = saved_reg;
+ } else if (!test_thread_flag(TIF_SINGLESTEP)) {
tracehook_report_syscall_exit(regs, 0);
- else if (tracehook_report_syscall_entry(regs))
- forget_syscall(regs);
+ regs->regs[regno] = saved_reg;
+ } else {
+ regs->regs[regno] = saved_reg;
- regs->regs[regno] = saved_reg;
+ /*
+ * Signal a pseudo-step exception since we are stepping but
+ * tracer modifications to the registers may have rewound the
+ * state machine.
+ */
+ tracehook_report_syscall_exit(regs, 1);
+ }
}
int syscall_trace_enter(struct pt_regs *regs)
@@ -1851,12 +1862,14 @@ int syscall_trace_enter(struct pt_regs *regs)
void syscall_trace_exit(struct pt_regs *regs)
{
+ unsigned long flags = READ_ONCE(current_thread_info()->flags);
+
audit_syscall_exit(regs);
- if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ if (flags & _TIF_SYSCALL_TRACEPOINT)
trace_sys_exit(regs, regs_return_value(regs));
- if (test_thread_flag(TIF_SYSCALL_TRACE))
+ if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
rseq_syscall(regs);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 801d56cdf701..3b4f31f35e45 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -800,7 +800,6 @@ static void setup_restart_syscall(struct pt_regs *regs)
*/
static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
- struct task_struct *tsk = current;
sigset_t *oldset = sigmask_to_save();
int usig = ksig->sig;
int ret;
@@ -824,14 +823,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
*/
ret |= !valid_user_regs(®s->user_regs, current);
- /*
- * Fast forward the stepping logic so we step into the signal
- * handler.
- */
- if (!ret)
- user_fastforward_single_step(tsk);
-
- signal_setup_done(ret, ksig, 0);
+ /* Step into the signal handler if we are stepping */
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP));
}
/*
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 5f5b868292f5..7c14466a12af 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -139,7 +139,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
local_daif_mask();
flags = current_thread_info()->flags;
- if (!has_syscall_work(flags)) {
+ if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) {
/*
* We're off to userspace, where interrupts are
* always enabled after we restore the flags from
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 263a0269a59c0b4145829462a107fe7f7327105f Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen(a)kernel.org>
Date: Mon, 29 Jun 2020 11:25:43 -0500
Subject: [PATCH] arm64: dts: stratix10: add status to qspi dts node
Add status = "okay" to QSPI node.
Fixes: 0cb140d07fc75 ("arm64: dts: stratix10: Add QSPI support for Stratix10")
Cc: linux-stable <stable(a)vger.kernel.org> # >= v5.6
Signed-off-by: Dinh Nguyen <dinguyen(a)kernel.org>
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index f6c4a15079d3..feadd21bc0dc 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -155,6 +155,7 @@
};
&qspi {
+ status = "okay";
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
index 9946515b8afd..4000c393243d 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
@@ -188,6 +188,7 @@
};
&qspi {
+ status = "okay";
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 263a0269a59c0b4145829462a107fe7f7327105f Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen(a)kernel.org>
Date: Mon, 29 Jun 2020 11:25:43 -0500
Subject: [PATCH] arm64: dts: stratix10: add status to qspi dts node
Add status = "okay" to QSPI node.
Fixes: 0cb140d07fc75 ("arm64: dts: stratix10: Add QSPI support for Stratix10")
Cc: linux-stable <stable(a)vger.kernel.org> # >= v5.6
Signed-off-by: Dinh Nguyen <dinguyen(a)kernel.org>
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index f6c4a15079d3..feadd21bc0dc 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -155,6 +155,7 @@
};
&qspi {
+ status = "okay";
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
index 9946515b8afd..4000c393243d 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
@@ -188,6 +188,7 @@
};
&qspi {
+ status = "okay";
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4237c625304b212a3f30adf787901082082511ec Mon Sep 17 00:00:00 2001
From: Tim Harvey <tharvey(a)gateworks.com>
Date: Tue, 23 Jun 2020 12:06:54 -0700
Subject: [PATCH] ARM: dts: imx6qdl-gw551x: fix audio SSI
The audio codec on the GW551x routes to ssi1. It fixes audio capture on
the device.
Cc: stable(a)vger.kernel.org
Fixes: 3117e851cef1 ("ARM: dts: imx: Add TDA19971 HDMI Receiver to GW551x")
Signed-off-by: Tim Harvey <tharvey(a)gateworks.com>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
diff --git a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi
index c38e86eedcc0..8c33510c9519 100644
--- a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi
@@ -110,7 +110,7 @@
simple-audio-card,frame-master = <&sound_codec>;
sound_cpu: simple-audio-card,cpu {
- sound-dai = <&ssi2>;
+ sound-dai = <&ssi1>;
};
sound_codec: simple-audio-card,codec {
From: Finley Xiao <finley.xiao(a)rock-chips.com>
commit 371a3bc79c11b707d7a1b7a2c938dc3cc042fffb upstream.
The function cpu_power_to_freq is used to find a frequency and set the
cooling device to consume at most the power to be converted. For example,
if the power to be converted is 80mW, and the em table is as follow.
struct em_cap_state table[] = {
/* KHz mW */
{ 1008000, 36, 0 },
{ 1200000, 49, 0 },
{ 1296000, 59, 0 },
{ 1416000, 72, 0 },
{ 1512000, 86, 0 },
};
The target frequency should be 1416000KHz, not 1512000KHz.
Fixes: 349d39dc5739 ("thermal: cpu_cooling: merge frequency and power tables")
Cc: <stable(a)vger.kernel.org> # v4.13+
Signed-off-by: Finley Xiao <finley.xiao(a)rock-chips.com>
Acked-by: Viresh Kumar <viresh.kumar(a)linaro.org>
Reviewed-by: Amit Kucheria <amit.kucheria(a)linaro.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
Link: https://lore.kernel.org/r/20200619090825.32747-1-finley.xiao@rock-chips.com
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
---
Hi Greg,
I am resending this as I got your emails of this failing on 4.14, 4.19
and 5.4. This should be applied to all three of them.
@Finley: I hope I have done it correctly, please do check it as this
required me to rewrite the code to adapt to previous kernels.
drivers/thermal/cpu_cooling.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 908a8014cf76..1f4387a5ceae 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -280,11 +280,11 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
int i;
struct freq_table *freq_table = cpufreq_cdev->freq_table;
- for (i = 1; i <= cpufreq_cdev->max_level; i++)
- if (power > freq_table[i].power)
+ for (i = 0; i < cpufreq_cdev->max_level; i++)
+ if (power >= freq_table[i].power)
break;
- return freq_table[i - 1].frequency;
+ return freq_table[i].frequency;
}
/**
--
2.25.0.rc1.19.g042ed3e048af
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6348dd291e3653534a9e28e6917569bc9967b35b Mon Sep 17 00:00:00 2001
From: Charan Teja Kalla <charante(a)codeaurora.org>
Date: Fri, 19 Jun 2020 17:27:19 +0530
Subject: [PATCH] dmabuf: use spinlock to access dmabuf->name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
There exists a sleep-while-atomic bug while accessing the dmabuf->name
under mutex in the dmabuffs_dname(). This is caused from the SELinux
permissions checks on a process where it tries to validate the inherited
files from fork() by traversing them through iterate_fd() (which
traverse files under spin_lock) and call
match_file(security/selinux/hooks.c) where the permission checks happen.
This audit information is logged using dump_common_audit_data() where it
calls d_path() to get the file path name. If the file check happen on
the dmabuf's fd, then it ends up in ->dmabuffs_dname() and use mutex to
access dmabuf->name. The flow will be like below:
flush_unauthorized_files()
iterate_fd()
spin_lock() --> Start of the atomic section.
match_file()
file_has_perm()
avc_has_perm()
avc_audit()
slow_avc_audit()
common_lsm_audit()
dump_common_audit_data()
audit_log_d_path()
d_path()
dmabuffs_dname()
mutex_lock()--> Sleep while atomic.
Call trace captured (on 4.19 kernels) is below:
___might_sleep+0x204/0x208
__might_sleep+0x50/0x88
__mutex_lock_common+0x5c/0x1068
__mutex_lock_common+0x5c/0x1068
mutex_lock_nested+0x40/0x50
dmabuffs_dname+0xa0/0x170
d_path+0x84/0x290
audit_log_d_path+0x74/0x130
common_lsm_audit+0x334/0x6e8
slow_avc_audit+0xb8/0xf8
avc_has_perm+0x154/0x218
file_has_perm+0x70/0x180
match_file+0x60/0x78
iterate_fd+0x128/0x168
selinux_bprm_committing_creds+0x178/0x248
security_bprm_committing_creds+0x30/0x48
install_exec_creds+0x1c/0x68
load_elf_binary+0x3a4/0x14e0
search_binary_handler+0xb0/0x1e0
So, use spinlock to access dmabuf->name to avoid sleep-while-atomic.
Cc: <stable(a)vger.kernel.org> [5.3+]
Signed-off-by: Charan Teja Kalla <charante(a)codeaurora.org>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
Acked-by: Christian König <christian.koenig(a)amd.com>
[sumits: added comment to spinlock_t definition to avoid warning]
Signed-off-by: Sumit Semwal <sumit.semwal(a)linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/a83e7f0d-4e54-9848-4b58-e1acd…
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 412629601ad3..1ca609f66fdf 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -45,10 +45,10 @@ static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
size_t ret = 0;
dmabuf = dentry->d_fsdata;
- dma_resv_lock(dmabuf->resv, NULL);
+ spin_lock(&dmabuf->name_lock);
if (dmabuf->name)
ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN);
- dma_resv_unlock(dmabuf->resv);
+ spin_unlock(&dmabuf->name_lock);
return dynamic_dname(dentry, buffer, buflen, "/%s:%s",
dentry->d_name.name, ret > 0 ? name : "");
@@ -338,8 +338,10 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
kfree(name);
goto out_unlock;
}
+ spin_lock(&dmabuf->name_lock);
kfree(dmabuf->name);
dmabuf->name = name;
+ spin_unlock(&dmabuf->name_lock);
out_unlock:
dma_resv_unlock(dmabuf->resv);
@@ -402,10 +404,10 @@ static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file)
/* Don't count the temporary reference taken inside procfs seq_show */
seq_printf(m, "count:\t%ld\n", file_count(dmabuf->file) - 1);
seq_printf(m, "exp_name:\t%s\n", dmabuf->exp_name);
- dma_resv_lock(dmabuf->resv, NULL);
+ spin_lock(&dmabuf->name_lock);
if (dmabuf->name)
seq_printf(m, "name:\t%s\n", dmabuf->name);
- dma_resv_unlock(dmabuf->resv);
+ spin_unlock(&dmabuf->name_lock);
}
static const struct file_operations dma_buf_fops = {
@@ -542,6 +544,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
dmabuf->size = exp_info->size;
dmabuf->exp_name = exp_info->exp_name;
dmabuf->owner = exp_info->owner;
+ spin_lock_init(&dmabuf->name_lock);
init_waitqueue_head(&dmabuf->poll);
dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index ab0c156abee6..a2ca294eaebe 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -311,6 +311,7 @@ struct dma_buf {
void *vmap_ptr;
const char *exp_name;
const char *name;
+ spinlock_t name_lock; /* spinlock to protect name access */
struct module *owner;
struct list_head list_node;
void *priv;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a683f390b93f4d1292f849fc48d28e322046120f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Mon, 4 Jul 2016 09:50:36 +0000
Subject: [PATCH] timers: Forward the wheel clock whenever possible
The wheel clock is stale when a CPU goes into a long idle sleep. This has the
side effect that timers which are queued end up in the outer wheel levels.
That results in coarser granularity.
To solve this, we keep track of the idle state and forward the wheel clock
whenever possible.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Arjan van de Ven <arjan(a)infradead.org>
Cc: Chris Mason <clm(a)fb.com>
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: Frederic Weisbecker <fweisbec(a)gmail.com>
Cc: George Spelvin <linux(a)sciencehorizons.net>
Cc: Josh Triplett <josh(a)joshtriplett.org>
Cc: Len Brown <lenb(a)kernel.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Paul E. McKenney <paulmck(a)linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: rt(a)linutronix.de
Link: http://lkml.kernel.org/r/20160704094342.512039360@linutronix.de
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 966a5a6fdd0a..f738251000fe 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -164,3 +164,4 @@ static inline void timers_update_migration(bool update_nohz) { }
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+void timer_clear_idle(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 69abc7bfe80f..5d81f9aa30d2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -700,6 +700,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
delta = next_tick - basemono;
if (delta <= (u64)TICK_NSEC) {
tick.tv64 = 0;
+
+ /*
+ * Tell the timer code that the base is not idle, i.e. undo
+ * the effect of get_next_timer_interrupt():
+ */
+ timer_clear_idle();
/*
* We've not stopped the tick yet, and there's a timer in the
* next period, so no point in stopping it either, bail.
@@ -809,6 +815,12 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
tick_do_update_jiffies64(now);
cpu_load_update_nohz_stop();
+ /*
+ * Clear the timer idle flag, so we avoid IPIs on remote queueing and
+ * the clock forward checks in the enqueue path:
+ */
+ timer_clear_idle();
+
calc_load_exit_idle();
touch_softlockup_watchdog_sched();
/*
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 658051c97a3c..9339d71ee998 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -196,9 +196,11 @@ struct timer_base {
spinlock_t lock;
struct timer_list *running_timer;
unsigned long clk;
+ unsigned long next_expiry;
unsigned int cpu;
bool migration_enabled;
bool nohz_active;
+ bool is_idle;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -519,24 +521,37 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
{
__internal_add_timer(base, timer);
+ if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+ return;
+
/*
- * Check whether the other CPU is in dynticks mode and needs
- * to be triggered to reevaluate the timer wheel. We are
- * protected against the other CPU fiddling with the timer by
- * holding the timer base lock. This also makes sure that a
- * CPU on the way to stop its tick can not evaluate the timer
- * wheel.
- *
- * Spare the IPI for deferrable timers on idle targets though.
- * The next busy ticks will take care of it. Except full dynticks
- * require special care against races with idle_cpu(), lets deal
- * with that later.
+ * TODO: This wants some optimizing similar to the code below, but we
+ * will do that when we switch from push to pull for deferrable timers.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) {
- if (!(timer->flags & TIMER_DEFERRABLE) ||
- tick_nohz_full_cpu(base->cpu))
+ if (timer->flags & TIMER_DEFERRABLE) {
+ if (tick_nohz_full_cpu(base->cpu))
wake_up_nohz_cpu(base->cpu);
+ return;
}
+
+ /*
+ * We might have to IPI the remote CPU if the base is idle and the
+ * timer is not deferrable. If the other CPU is on the way to idle
+ * then it can't set base->is_idle as we hold the base lock:
+ */
+ if (!base->is_idle)
+ return;
+
+ /* Check whether this is the new first expiring timer: */
+ if (time_after_eq(timer->expires, base->next_expiry))
+ return;
+
+ /*
+ * Set the next expiry time and kick the CPU so it can reevaluate the
+ * wheel:
+ */
+ base->next_expiry = timer->expires;
+ wake_up_nohz_cpu(base->cpu);
}
#ifdef CONFIG_TIMER_STATS
@@ -844,10 +859,11 @@ static inline struct timer_base *get_timer_base(u32 tflags)
return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
}
-static inline struct timer_base *get_target_base(struct timer_base *base,
- unsigned tflags)
+#ifdef CONFIG_NO_HZ_COMMON
+static inline struct timer_base *
+__get_target_base(struct timer_base *base, unsigned tflags)
{
-#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
if ((tflags & TIMER_PINNED) || !base->migration_enabled)
return get_timer_this_cpu_base(tflags);
return get_timer_cpu_base(tflags, get_nohz_timer_target());
@@ -856,6 +872,43 @@ static inline struct timer_base *get_target_base(struct timer_base *base,
#endif
}
+static inline void forward_timer_base(struct timer_base *base)
+{
+ /*
+ * We only forward the base when it's idle and we have a delta between
+ * base clock and jiffies.
+ */
+ if (!base->is_idle || (long) (jiffies - base->clk) < 2)
+ return;
+
+ /*
+ * If the next expiry value is > jiffies, then we fast forward to
+ * jiffies otherwise we forward to the next expiry value.
+ */
+ if (time_after(base->next_expiry, jiffies))
+ base->clk = jiffies;
+ else
+ base->clk = base->next_expiry;
+}
+#else
+static inline struct timer_base *
+__get_target_base(struct timer_base *base, unsigned tflags)
+{
+ return get_timer_this_cpu_base(tflags);
+}
+
+static inline void forward_timer_base(struct timer_base *base) { }
+#endif
+
+static inline struct timer_base *
+get_target_base(struct timer_base *base, unsigned tflags)
+{
+ struct timer_base *target = __get_target_base(base, tflags);
+
+ forward_timer_base(target);
+ return target;
+}
+
/*
* We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
* that all timers which are tied to this base are locked, and the base itself
@@ -1417,16 +1470,49 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
spin_lock(&base->lock);
nextevt = __next_timer_interrupt(base);
- spin_unlock(&base->lock);
+ base->next_expiry = nextevt;
+ /*
+ * We have a fresh next event. Check whether we can forward the base:
+ */
+ if (time_after(nextevt, jiffies))
+ base->clk = jiffies;
+ else if (time_after(nextevt, base->clk))
+ base->clk = nextevt;
- if (time_before_eq(nextevt, basej))
+ if (time_before_eq(nextevt, basej)) {
expires = basem;
- else
+ base->is_idle = false;
+ } else {
expires = basem + (nextevt - basej) * TICK_NSEC;
+ /*
+ * If we expect to sleep more than a tick, mark the base idle:
+ */
+ if ((expires - basem) > TICK_NSEC)
+ base->is_idle = true;
+ }
+ spin_unlock(&base->lock);
return cmp_next_hrtimer_event(basem, expires);
}
+/**
+ * timer_clear_idle - Clear the idle state of the timer base
+ *
+ * Called with interrupts disabled
+ */
+void timer_clear_idle(void)
+{
+ struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+
+ /*
+ * We do this unlocked. The worst outcome is a remote enqueue sending
+ * a pointless IPI, but taking the lock would just make the window for
+ * sending the IPI a few instructions smaller for the cost of taking
+ * the lock in the exit from idle path.
+ */
+ base->is_idle = false;
+}
+
static int collect_expired_timers(struct timer_base *base,
struct hlist_head *heads)
{
@@ -1440,7 +1526,7 @@ static int collect_expired_timers(struct timer_base *base,
/*
* If the next timer is ahead of time forward to current
- * jiffies, otherwise forward to the next expiry time.
+ * jiffies, otherwise forward to the next expiry time:
*/
if (time_after(next, jiffies)) {
/* The call site will increment clock! */