The shadow call stack for irq now stored in current task's thread info may restored incorrectly, so backport call_on_irq_stack from mainline to fix it.
Ard Biesheuvel (1): arm64: Stash shadow stack pointer in the task struct on interrupt
Mark Rutland (3): arm64: entry: move arm64_preempt_schedule_irq to entry-common.c arm64: entry: add a call_on_irq_stack helper arm64: entry: convert IRQ+FIQ handlers to C
Xiang Yang (1): Revert "arm64: Stash shadow stack pointer in the task struct on interrupt"
arch/arm64/include/asm/exception.h | 5 ++ arch/arm64/kernel/entry-common.c | 86 ++++++++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 84 ++++++++++++++--------------- arch/arm64/kernel/process.c | 17 ------ 4 files changed, 132 insertions(+), 60 deletions(-)
This reverts commit 3f225f29c69c13ce1cbdb1d607a42efeef080056.
The shadow call stack for irq now is stored in current task's thread info in irq_stack_entry. There is a possibility that we have some soft irqs pending at the end of hard irq, and when we process softirq with the irq enabled, irq_stack_entry will enter again and overwrite the shadow call stack whitch stored in current task's thread info, leading to the incorrect shadow call stack restoration for the first entry of the hard IRQ, then the system end up with a panic.
task A | task A -------------------------------------+------------------------------------ el1_irq //irq1 enter | irq_handler //save scs_sp1 | gic_handle_irq | irq_exit | __do_softirq | | el1_irq //irq2 enter | irq_handler //save scs_sp2 | //overwrite scs_sp1 | ... | irq_stack_exit //restore scs_sp2 irq_stack_exit //restore wrong | //scs_sp2 |
Fixes: 3f225f29c69c ("arm64: Stash shadow stack pointer in the task struct on interrupt")
Cc: Ard Biesheuvel ardb@kernel.org Signed-off-by: Xiang Yang xiangyang3@huawei.com --- arch/arm64/kernel/entry.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a94acea770c7..020a455824be 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -431,7 +431,9 @@ SYM_CODE_END(__swpan_exit_el0)
.macro irq_stack_entry mov x19, sp // preserve the original sp - scs_save tsk // preserve the original shadow stack +#ifdef CONFIG_SHADOW_CALL_STACK + mov x24, scs_sp // preserve the original shadow stack +#endif
/* * Compare sp with the base of the task stack. @@ -465,7 +467,9 @@ SYM_CODE_END(__swpan_exit_el0) */ .macro irq_stack_exit mov sp, x19 - scs_load_current +#ifdef CONFIG_SHADOW_CALL_STACK + mov scs_sp, x24 +#endif .endm
/* GPRs used by entry code */
From: Mark Rutland mark.rutland@arm.com
commit 33a3581a76f3a36c7dcc9864120ce681bcfbcff1 upstream.
Subsequent patches will pull more of the IRQ entry handling into C. To keep this in one place, let's move arm64_preempt_schedule_irq() into entry-common.c along with the other entry management functions.
We no longer need to include <linux/lockdep.h> in process.c, so the include directive is removed.
There should be no functional change as a result of this patch.
Reviewed-by Joey Gouly joey.gouly@arm.com
Signed-off-by: Mark Rutland mark.rutland@arm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Acked-by: Marc Zyngier maz@kernel.org Cc: James Morse james.morse@arm.com Cc: Will Deacon will@kernel.org Link: https://lore.kernel.org/r/20210607094624.34689-5-mark.rutland@arm.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Xiang Yang xiangyang3@huawei.com --- arch/arm64/kernel/entry-common.c | 20 ++++++++++++++++++++ arch/arm64/kernel/process.c | 17 ----------------- 2 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 7a8cd856ee85..a533f4827253 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -6,7 +6,11 @@ */
#include <linux/context_tracking.h> +#include <linux/linkage.h> +#include <linux/lockdep.h> #include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/thread_info.h>
#include <asm/cpufeature.h> @@ -109,6 +113,22 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) exit_to_kernel_mode(regs); }
+asmlinkage void __sched arm64_preempt_schedule_irq(void) +{ + lockdep_assert_irqs_disabled(); + + /* + * Preempting a task from an IRQ means we leave copies of PSTATE + * on the stack. cpufeature's enable calls may modify PSTATE, but + * resuming one of these preempted tasks would undo those changes. + * + * Only allow a task to be preempted once cpufeatures have been + * enabled. + */ + if (system_capabilities_finalized()) + preempt_schedule_irq(); +} + static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 3696dbcbfa80..a04b152696cf 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -18,7 +18,6 @@ #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/kernel.h> -#include <linux/lockdep.h> #include <linux/mman.h> #include <linux/mm.h> #include <linux/nospec.h> @@ -702,22 +701,6 @@ static int __init tagged_addr_init(void) core_initcall(tagged_addr_init); #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
-asmlinkage void __sched arm64_preempt_schedule_irq(void) -{ - lockdep_assert_irqs_disabled(); - - /* - * Preempting a task from an IRQ means we leave copies of PSTATE - * on the stack. cpufeature's enable calls may modify PSTATE, but - * resuming one of these preempted tasks would undo those changes. - * - * Only allow a task to be preempted once cpufeatures have been - * enabled. - */ - if (system_capabilities_finalized()) - preempt_schedule_irq(); -} - #ifdef CONFIG_BINFMT_ELF int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, bool has_interp, bool is_interp)
From: Mark Rutland mark.rutland@arm.com
commit f8049488e7d37b0a0e438ee449e83b3e46958743 upstream.
When handling IRQ/FIQ exceptions the entry assembly may transition from a task's stack to a CPU's IRQ stack (and IRQ shadow call stack).
In subsequent patches we want to migrate the IRQ/FIQ triage logic to C, and as we want to perform some actions on the task stack (e.g. EL1 preemption), we need to switch stacks within the C handler. So that we can do so, this patch adds a helper to call a function on a CPU's IRQ stack (and shadow stack as appropriate).
Subsequent patches will make use of the new helper function.
Signed-off-by: Mark Rutland mark.rutland@arm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Acked-by: Marc Zyngier maz@kernel.org Cc: James Morse james.morse@arm.com Cc: Will Deacon will@kernel.org Link: https://lore.kernel.org/r/20210607094624.34689-7-mark.rutland@arm.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Xiang Yang xiangyang3@huawei.com --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/entry.S | 36 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+)
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index cc5b2203d876..def53267f4a2 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -28,6 +28,8 @@ static inline u32 disr_to_esr(u64 disr)
asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); +asmlinkage void call_on_irq_stack(struct pt_regs *regs, + void (*func)(struct pt_regs *)); asmlinkage void enter_from_user_mode(void); asmlinkage void exit_to_user_mode(void); void arm64_enter_nmi(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 020a455824be..8b17d83e8c87 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -1043,6 +1043,42 @@ SYM_CODE_START(ret_from_fork) SYM_CODE_END(ret_from_fork) NOKPROBE(ret_from_fork)
+/* + * void call_on_irq_stack(struct pt_regs *regs, + * void (*func)(struct pt_regs *)); + * + * Calls func(regs) using this CPU's irq stack and shadow irq stack. + */ +SYM_FUNC_START(call_on_irq_stack) +#ifdef CONFIG_SHADOW_CALL_STACK + stp scs_sp, xzr, [sp, #-16]! + adr_this_cpu scs_sp, irq_shadow_call_stack, x17 +#endif + /* Create a frame record to save our LR and SP (implicit in FP) */ + stp x29, x30, [sp, #-16]! + mov x29, sp + + ldr_this_cpu x16, irq_stack_ptr, x17 + mov x15, #IRQ_STACK_SIZE + add x16, x16, x15 + + /* Move to the new stack and call the function there */ + mov sp, x16 + blr x1 + + /* + * Restore the SP from the FP, and restore the FP and LR from the frame + * record. + */ + mov sp, x29 + ldp x29, x30, [sp], #16 +#ifdef CONFIG_SHADOW_CALL_STACK + ldp scs_sp, xzr, [sp], #16 +#endif + ret +SYM_FUNC_END(call_on_irq_stack) +NOKPROBE(call_on_irq_stack) + #ifdef CONFIG_ARM_SDE_INTERFACE
#include <asm/sdei.h>
From: Mark Rutland mark.rutland@arm.com
commit 064dbfb4169141943ec7d9dbfd02974dd008f2ce upstream.
For various reasons we'd like to convert the bulk of arm64's exception triage logic to C. As a step towards that, this patch converts the EL1 and EL0 IRQ+FIQ triage logic to C.
Separate C functions are added for the native and compat cases so that in subsequent patches we can handle native/compat differences in C.
Since the triage functions can now call arm64_apply_bp_hardening() directly, the do_el0_irq_bp_hardening() wrapper function is removed.
Since the user_exit_irqoff macro is now unused, it is removed. The user_enter_irqoff macro is still used by the ret_to_user code, and cannot be removed at this time.
Signed-off-by: Mark Rutland mark.rutland@arm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Acked-by: Marc Zyngier maz@kernel.org Reviewed-by: Joey Gouly joey.gouly@arm.com Cc: James Morse james.morse@arm.com Cc: Will Deacon will@kernel.org Link: https://lore.kernel.org/r/20210607094624.34689-8-mark.rutland@arm.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Xiang Yang xiangyang3@huawei.com --- arch/arm64/include/asm/exception.h | 3 ++ arch/arm64/kernel/entry-common.c | 68 +++++++++++++++++++++++++++++- arch/arm64/kernel/entry.S | 54 +++--------------------- 3 files changed, 77 insertions(+), 48 deletions(-)
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index def53267f4a2..846ca8132f76 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -26,6 +26,9 @@ static inline u32 disr_to_esr(u64 disr) return esr; }
+asmlinkage void el1_irq_handler(struct pt_regs *regs); +asmlinkage void el0_irq_handler(struct pt_regs *regs); +asmlinkage void el0_irq_compat_handler(struct pt_regs *regs); asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void call_on_irq_stack(struct pt_regs *regs, diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index a533f4827253..340249c65007 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -19,6 +19,8 @@ #include <asm/exception.h> #include <asm/kprobes.h> #include <asm/mmu.h> +#include <asm/processor.h> +#include <asm/stacktrace.h> #include <asm/sysreg.h>
/* @@ -113,7 +115,7 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) exit_to_kernel_mode(regs); }
-asmlinkage void __sched arm64_preempt_schedule_irq(void) +static void __sched arm64_preempt_schedule_irq(void) { lockdep_assert_irqs_disabled();
@@ -141,6 +143,42 @@ static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); }
+static void do_interrupt_handler(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + if (on_thread_stack()) + call_on_irq_stack(regs, handler); + else + handler(regs); +} + +extern void (*handle_arch_irq)(struct pt_regs *); + +static void noinstr el1_interrupt(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + + enter_el1_irq_or_nmi(regs); + do_interrupt_handler(regs, handler); + + /* + * Note: thread_info::preempt_count includes both thread_info::count + * and thread_info::need_resched, and is not equivalent to + * preempt_count(). + */ + if (IS_ENABLED(CONFIG_PREEMPTION) && + READ_ONCE(current_thread_info()->preempt_count) == 0) + arm64_preempt_schedule_irq(); + + exit_el1_irq_or_nmi(regs); +} + +asmlinkage void noinstr el1_irq_handler(struct pt_regs *regs) +{ + el1_interrupt(regs, handle_arch_irq); +} + static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -445,6 +483,29 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) } }
+static void noinstr el0_interrupt(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + enter_from_user_mode(); + + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + + if (regs->pc & BIT(55)) + arm64_apply_bp_hardening(); + + do_interrupt_handler(regs, handler); +} + +static void noinstr __el0_irq_handler_common(struct pt_regs *regs) +{ + el0_interrupt(regs, handle_arch_irq); +} + +asmlinkage void noinstr el0_irq_handler(struct pt_regs *regs) +{ + __el0_irq_handler_common(regs); +} + #ifdef CONFIG_COMPAT static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { @@ -453,6 +514,11 @@ static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) do_el0_cp15(esr, regs); }
+asmlinkage void noinstr el0_irq_compat_handler(struct pt_regs *regs) +{ + __el0_irq_handler_common(regs); +} + static void noinstr el0_svc_compat(struct pt_regs *regs) { enter_from_user_mode(); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 8b17d83e8c87..b0d102669dde 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -429,49 +429,6 @@ SYM_CODE_START_LOCAL(__swpan_exit_el0) SYM_CODE_END(__swpan_exit_el0) #endif
- .macro irq_stack_entry - mov x19, sp // preserve the original sp -#ifdef CONFIG_SHADOW_CALL_STACK - mov x24, scs_sp // preserve the original shadow stack -#endif - - /* - * Compare sp with the base of the task stack. - * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack, - * and should switch to the irq stack. - */ - ldr x25, [tsk, TSK_STACK] - eor x25, x25, x19 - and x25, x25, #~(THREAD_SIZE - 1) - cbnz x25, 9998f - - ldr_this_cpu x25, irq_stack_ptr, x26 - mov x26, #IRQ_STACK_SIZE - add x26, x25, x26 - - /* switch to the irq stack */ - mov sp, x26 - -#ifdef CONFIG_SHADOW_CALL_STACK - /* also switch to the irq shadow stack */ - adr_this_cpu scs_sp, irq_shadow_call_stack, x26 -#endif - -9998: - .endm - - /* - * The callee-saved regs (x19-x29) should be preserved between - * irq_stack_entry and irq_stack_exit, but note that kernel_entry - * uses x20-x23 to store data for later use. - */ - .macro irq_stack_exit - mov sp, x19 -#ifdef CONFIG_SHADOW_CALL_STACK - mov scs_sp, x24 -#endif - .endm - /* GPRs used by entry code */ tsk .req x28 // current thread_info
@@ -675,7 +632,8 @@ SYM_CODE_END(el1_sync) .align 6 SYM_CODE_START_LOCAL_NOALIGN(el1_irq) kernel_entry 1 - el1_interrupt_handler handle_arch_irq + mov x0, sp + bl el1_irq_handler kernel_exit 1 SYM_CODE_END(el1_irq)
@@ -702,7 +660,9 @@ SYM_CODE_END(el0_sync_compat) .align 6 SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat) kernel_entry 0, 32 - b el0_irq_naked + mov x0, sp + bl el0_irq_compat_handler + b ret_to_user SYM_CODE_END(el0_irq_compat)
SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat) @@ -714,8 +674,8 @@ SYM_CODE_END(el0_error_compat) .align 6 SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 -el0_irq_naked: - el0_interrupt_handler handle_arch_irq + mov x0, sp + bl el0_irq_handler b ret_to_user SYM_CODE_END(el0_irq)
From: Ard Biesheuvel ardb@kernel.org
commit 59b37fe52f49955791a460752c37145f1afdcad1 upstream.
Instead of reloading the shadow call stack pointer from the ordinary stack, which may be vulnerable to the kind of gadget based attacks shadow call stacks were designed to prevent, let's store a task's shadow call stack pointer in the task struct when switching to the shadow IRQ stack.
Given that currently, the task_struct::scs_sp field is only used to preserve the shadow call stack pointer while a task is scheduled out or running in user space, reusing this field to preserve and restore it while running off the IRQ stack must be safe, as those occurrences are guaranteed to never overlap. (The stack switching logic only switches stacks when running from the task stack, and so the value being saved here always corresponds to the task mode shadow stack)
While at it, fold a mov/add/mov sequence into a single add.
Signed-off-by: Ard Biesheuvel ardb@kernel.org Reviewed-by: Kees Cook keescook@chromium.org Acked-by: Mark Rutland mark.rutland@arm.com Link: https://lore.kernel.org/r/20230109174800.3286265-3-ardb@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Xiang Yang xiangyang3@huawei.com --- arch/arm64/kernel/entry.S | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b0d102669dde..e35f3cec74d9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -1011,19 +1011,19 @@ NOKPROBE(ret_from_fork) */ SYM_FUNC_START(call_on_irq_stack) #ifdef CONFIG_SHADOW_CALL_STACK - stp scs_sp, xzr, [sp, #-16]! + get_current_task x16 + scs_save x16 adr_this_cpu scs_sp, irq_shadow_call_stack, x17 #endif + /* Create a frame record to save our LR and SP (implicit in FP) */ stp x29, x30, [sp, #-16]! mov x29, sp
ldr_this_cpu x16, irq_stack_ptr, x17 - mov x15, #IRQ_STACK_SIZE - add x16, x16, x15
/* Move to the new stack and call the function there */ - mov sp, x16 + add sp, x16, #IRQ_STACK_SIZE blr x1
/* @@ -1032,9 +1032,7 @@ SYM_FUNC_START(call_on_irq_stack) */ mov sp, x29 ldp x29, x30, [sp], #16 -#ifdef CONFIG_SHADOW_CALL_STACK - ldp scs_sp, xzr, [sp], #16 -#endif + scs_load_current ret SYM_FUNC_END(call_on_irq_stack) NOKPROBE(call_on_irq_stack)
On Sun, 18 Feb 2024 at 03:33, Xiang Yang xiangyang3@huawei.com wrote:
The shadow call stack for irq now stored in current task's thread info may restored incorrectly, so backport call_on_irq_stack from mainline to fix it.
Ard Biesheuvel (1): arm64: Stash shadow stack pointer in the task struct on interrupt
Mark Rutland (3): arm64: entry: move arm64_preempt_schedule_irq to entry-common.c arm64: entry: add a call_on_irq_stack helper arm64: entry: convert IRQ+FIQ handlers to C
Xiang Yang (1): Revert "arm64: Stash shadow stack pointer in the task struct on interrupt"
Backporting this was a mistake. Not only was the backport flawed, the original issue (stashing the shadow call stack pointer onto the normal stack) was not even present, at least not to the same extent.
Stashing the shadow call stack pointer in register X24 works around the original issue, except for the case where a hardirq is taken while softirqs are being processed. In this case, X24 will be preserved on the stack by the hardirq handling logic, and restored after. Theoretically, that creates a window where the shadow call stack pointer could be corrupted deliberately, but it seems unlikely to me that this is exploitable in practice.
So in the light of this, I think doing only the revert here should be sufficient, and there is no need for the other backports in this series.
Thanks, I will send a v2 with the revert patch.
在 2024/2/18 18:06, Ard Biesheuvel 写道:
On Sun, 18 Feb 2024 at 03:33, Xiang Yang xiangyang3@huawei.com wrote:
The shadow call stack for irq now stored in current task's thread info may restored incorrectly, so backport call_on_irq_stack from mainline to fix it.
Ard Biesheuvel (1): arm64: Stash shadow stack pointer in the task struct on interrupt
Mark Rutland (3): arm64: entry: move arm64_preempt_schedule_irq to entry-common.c arm64: entry: add a call_on_irq_stack helper arm64: entry: convert IRQ+FIQ handlers to C
Xiang Yang (1): Revert "arm64: Stash shadow stack pointer in the task struct on interrupt"
Backporting this was a mistake. Not only was the backport flawed, the original issue (stashing the shadow call stack pointer onto the normal stack) was not even present, at least not to the same extent.
Stashing the shadow call stack pointer in register X24 works around the original issue, except for the case where a hardirq is taken while softirqs are being processed. In this case, X24 will be preserved on the stack by the hardirq handling logic, and restored after. Theoretically, that creates a window where the shadow call stack pointer could be corrupted deliberately, but it seems unlikely to me that this is exploitable in practice.
So in the light of this, I think doing only the revert here should be sufficient, and there is no need for the other backports in this series. .
linux-stable-mirror@lists.linaro.org