In some architectural corner cases, AT instructions can generate an exception, which KVM is not really ready to handle properly. Teach the code to handle this situation gracefully.
This is a backport of the respective upstream patches to v4.19(.142). James prepared and tested these already, but we were lacking the upstream commit IDs so far. I am sending this on his behalf, since he is off this week.
The original patches contained stable tags, but with a prerequisite patch in v5.3. Patch 2/4 is a backport of this one, patches 1/4 and 3/4 needed some massaging to apply and work on 4.19.
Cheers, Andre.
James Morse (4): KVM: arm64: Add kvm_extable for vaxorcism code KVM: arm64: Defer guest entry when an asynchronous exception is pending KVM: arm64: Survive synchronous exceptions caused by AT instructions KVM: arm64: Set HCR_EL2.PTW to prevent AT taking synchronous exception
arch/arm64/include/asm/kvm_arm.h | 3 +- arch/arm64/include/asm/kvm_asm.h | 43 ++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 8 ++++ arch/arm64/kvm/hyp/entry.S | 31 +++++++++++++--- arch/arm64/kvm/hyp/hyp-entry.S | 63 +++++++++++++++++++++----------- arch/arm64/kvm/hyp/switch.c | 39 ++++++++++++++++++-- 6 files changed, 154 insertions(+), 33 deletions(-)
From: James Morse james.morse@arm.com
commit e9ee186bb735bfc17fa81dbc9aebf268aee5b41e upstream.
KVM has a one instruction window where it will allow an SError exception to be consumed by the hypervisor without treating it as a hypervisor bug. This is used to consume asynchronous external abort that were caused by the guest.
As we are about to add another location that survives unexpected exceptions, generalise this code to make it behave like the host's extable.
KVM's version has to be mapped to EL2 to be accessible on nVHE systems.
The SError vaxorcism code is a one instruction window, so has two entries in the extable. Because the KVM code is copied for VHE and nVHE, we end up with four entries, half of which correspond with code that isn't mapped.
Cc: stable@vger.kernel.org # v4.19 Signed-off-by: James Morse james.morse@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm64/include/asm/kvm_asm.h | 15 ++++++++++ arch/arm64/kernel/vmlinux.lds.S | 8 +++++ arch/arm64/kvm/hyp/entry.S | 16 ++++++---- arch/arm64/kvm/hyp/hyp-entry.S | 51 +++++++++++++++++++------------- arch/arm64/kvm/hyp/switch.c | 31 +++++++++++++++++++ 5 files changed, 95 insertions(+), 26 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 102b5a5c47b6..ed701f5c2d9a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -111,6 +111,21 @@ extern u32 __init_stage2_translation(void); kern_hyp_va \vcpu .endm
+/* + * KVM extable for unexpected exceptions. + * In the same format _asm_extable, but output to a different section so that + * it can be mapped to EL2. The KVM version is not sorted. The caller must + * ensure: + * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented + * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. + */ +.macro _kvm_extable, from, to + .pushsection __kvm_ex_table, "a" + .align 3 + .long (\from - .), (\to - .) + .popsection +.endm + #endif
#endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index d6050c6e65bc..69e7c8d4a00f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -24,6 +24,13 @@ ENTRY(_text)
jiffies = jiffies_64;
+ +#define HYPERVISOR_EXTABLE \ + . = ALIGN(SZ_8); \ + __start___kvm_ex_table = .; \ + *(__kvm_ex_table) \ + __stop___kvm_ex_table = .; + #define HYPERVISOR_TEXT \ /* \ * Align to 4 KB so that \ @@ -39,6 +46,7 @@ jiffies = jiffies_64; __hyp_idmap_text_end = .; \ __hyp_text_start = .; \ *(.hyp.text) \ + HYPERVISOR_EXTABLE \ __hyp_text_end = .;
#define IDMAP_TEXT \ diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index fad1e164fe48..c21d279a5aab 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -148,18 +148,22 @@ alternative_endif // This is our single instruction exception window. A pending // SError is guaranteed to occur at the earliest when we unmask // it, and at the latest just after the ISB. - .global abort_guest_exit_start abort_guest_exit_start:
isb
- .global abort_guest_exit_end abort_guest_exit_end: + msr daifset, #4 // Mask aborts + ret
- // If the exception took place, restore the EL1 exception - // context so that we can report some information. - // Merge the exception code with the SError pending bit. - tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f + _kvm_extable abort_guest_exit_start, 9997f + _kvm_extable abort_guest_exit_end, 9997f +9997: + msr daifset, #4 // Mask aborts + mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) + + // restore the EL1 exception context so that we can report some + // information. Merge the exception code with the SError pending bit. msr elr_el2, x2 msr esr_el2, x3 msr spsr_el2, x4 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 24b4fbafe3e4..68c73c5d780c 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -26,6 +26,30 @@ #include <asm/kvm_mmu.h> #include <asm/mmu.h>
+.macro save_caller_saved_regs_vect + /* x0 and x1 were saved in the vector entry */ + stp x2, x3, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x16, x17, [sp, #-16]! +.endm + +.macro restore_caller_saved_regs_vect + ldp x16, x17, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + .text .pushsection .hyp.text, "ax"
@@ -163,27 +187,14 @@ el1_error: b __guest_exit
el2_error: - ldp x0, x1, [sp], #16 + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + + bl kvm_unexpected_el2_exception + + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect
- /* - * Only two possibilities: - * 1) Either we come from the exit path, having just unmasked - * PSTATE.A: change the return code to an EL2 fault, and - * carry on, as we're already in a sane state to handle it. - * 2) Or we come from anywhere else, and that's a bug: we panic. - * - * For (1), x0 contains the original return code and x1 doesn't - * contain anything meaningful at that stage. We can reuse them - * as temp registers. - * For (2), who cares? - */ - mrs x0, elr_el2 - adr x1, abort_guest_exit_start - cmp x0, x1 - adr x1, abort_guest_exit_end - ccmp x0, x1, #4, ne - b.ne __hyp_panic - mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) eret
ENTRY(__hyp_do_panic) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index f3978931aaf4..b5f095c6aa4b 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -23,6 +23,7 @@ #include <kvm/arm_psci.h>
#include <asm/cpufeature.h> +#include <asm/extable.h> #include <asm/kprobes.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> @@ -34,6 +35,9 @@ #include <asm/processor.h> #include <asm/thread_info.h>
+extern struct exception_table_entry __start___kvm_ex_table; +extern struct exception_table_entry __stop___kvm_ex_table; + /* Check whether the FP regs were dirtied while in the host-side run loop: */ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) { @@ -663,3 +667,30 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
unreachable(); } + +asmlinkage void __hyp_text kvm_unexpected_el2_exception(void) +{ + unsigned long addr, fixup; + struct kvm_cpu_context *host_ctxt; + struct exception_table_entry *entry, *end; + unsigned long elr_el2 = read_sysreg(elr_el2); + + entry = hyp_symbol_addr(__start___kvm_ex_table); + end = hyp_symbol_addr(__stop___kvm_ex_table); + host_ctxt = __hyp_this_cpu_ptr(kvm_host_cpu_state); + + while (entry < end) { + addr = (unsigned long)&entry->insn + entry->insn; + fixup = (unsigned long)&entry->fixup + entry->fixup; + + if (addr != elr_el2) { + entry++; + continue; + } + + write_sysreg(fixup, elr_el2); + return; + } + + hyp_panic(host_ctxt); +}
From: James Morse james.morse@arm.com
commit 5dcd0fdbb492d49dac6bf21c436dfcb5ded0a895 upstream.
SError that occur during world-switch's entry to the guest will be accounted to the guest, as the exception is masked until we enter the guest... but we want to attribute the SError as precisely as possible.
Reading DISR_EL1 before guest entry requires free registers, and using ESB+DISR_EL1 to consume and read back the ESR would leave KVM holding a host SError... We would rather leave the SError pending and let the host take it once we exit world-switch. To do this, we need to defer guest-entry if an SError is pending.
Read the ISR to see if SError (or an IRQ) is pending. If so fake an exit. Place this check between __guest_enter()'s save of the host registers, and restore of the guest's. SError that occur between here and the eret into the guest must have affected the guest's registers, which we can naturally attribute to the guest.
The dsb is needed to ensure any previous writes have been done before we read ISR_EL1. On systems without the v8.2 RAS extensions this doesn't give us anything as we can't contain errors, and the ESR bits to describe the severity are all implementation-defined. Replace this with a nop for these systems.
Cc: stable@vger.kernel.org # v4.19 Signed-off-by: James Morse james.morse@arm.com Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm64/kvm/hyp/entry.S | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index c21d279a5aab..fc83e932afbe 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -17,6 +17,7 @@
#include <linux/linkage.h>
+#include <asm/alternative.h> #include <asm/asm-offsets.h> #include <asm/assembler.h> #include <asm/fpsimdmacros.h> @@ -62,6 +63,20 @@ ENTRY(__guest_enter) // Store the host regs save_callee_saved_regs x1
+ // Now the host state is stored if we have a pending RAS SError it must + // affect the host. If any asynchronous exception is pending we defer + // the guest entry. The DSB isn't necessary before v8.2 as any SError + // would be fatal. +alternative_if ARM64_HAS_RAS_EXTN + dsb nshst + isb +alternative_else_nop_endif + mrs x1, isr_el1 + cbz x1, 1f + mov x0, #ARM_EXCEPTION_IRQ + ret + +1: add x18, x0, #VCPU_CONTEXT
// Restore guest regs x0-x17
From: James Morse james.morse@arm.com
commit 88a84ccccb3966bcc3f309cdb76092a9892c0260 upstream.
KVM doesn't expect any synchronous exceptions when executing, any such exception leads to a panic(). AT instructions access the guest page tables, and can cause a synchronous external abort to be taken.
The arm-arm is unclear on what should happen if the guest has configured the hardware update of the access-flag, and a memory type in TCR_EL1 that does not support atomic operations. B2.2.6 "Possible implementation restrictions on using atomic instructions" from DDI0487F.a lists synchronous external abort as a possible behaviour of atomic instructions that target memory that isn't writeback cacheable, but the page table walker may behave differently.
Make KVM robust to synchronous exceptions caused by AT instructions. Add a get_user() style helper for AT instructions that returns -EFAULT if an exception was generated.
While KVM's version of the exception table mixes synchronous and asynchronous exceptions, only one of these can occur at each location.
Re-enter the guest when the AT instructions take an exception on the assumption the guest will take the same exception. This isn't guaranteed to make forward progress, as the AT instructions may always walk the page tables, but guest execution may use the translation cached in the TLB.
This isn't a problem, as since commit 5dcd0fdbb492 ("KVM: arm64: Defer guest entry when an asynchronous exception is pending"), KVM will return to the host to process IRQs allowing the rest of the system to keep running.
Cc: stable@vger.kernel.org # v4.19 Signed-off-by: James Morse james.morse@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm64/include/asm/kvm_asm.h | 28 ++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/hyp-entry.S | 12 ++++++++++-- arch/arm64/kvm/hyp/switch.c | 8 ++++---- 3 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ed701f5c2d9a..e3c0dba5bdde 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -87,6 +87,34 @@ extern u32 __init_stage2_translation(void); *__hyp_this_cpu_ptr(sym); \ })
+#define __KVM_EXTABLE(from, to) \ + " .pushsection __kvm_ex_table, "a"\n" \ + " .align 3\n" \ + " .long (" #from " - .), (" #to " - .)\n" \ + " .popsection\n" + + +#define __kvm_at(at_op, addr) \ +( { \ + int __kvm_at_err = 0; \ + u64 spsr, elr; \ + asm volatile( \ + " mrs %1, spsr_el2\n" \ + " mrs %2, elr_el2\n" \ + "1: at "at_op", %3\n" \ + " isb\n" \ + " b 9f\n" \ + "2: msr spsr_el2, %1\n" \ + " msr elr_el2, %2\n" \ + " mov %w0, %4\n" \ + "9:\n" \ + __KVM_EXTABLE(1b, 2b) \ + : "+r" (__kvm_at_err), "=&r" (spsr), "=&r" (elr) \ + : "r" (addr), "i" (-EFAULT)); \ + __kvm_at_err; \ +} ) + + #else /* __ASSEMBLY__ */
.macro hyp_adr_this_cpu reg, sym, tmp diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 68c73c5d780c..ea063312bca1 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -186,6 +186,15 @@ el1_error: mov x0, #ARM_EXCEPTION_EL1_SERROR b __guest_exit
+el2_sync: + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + bl kvm_unexpected_el2_exception + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect + + eret + el2_error: save_caller_saved_regs_vect stp x29, x30, [sp, #-16]! @@ -223,7 +232,6 @@ ENDPROC(\label) invalid_vector el2t_irq_invalid invalid_vector el2t_fiq_invalid invalid_vector el2t_error_invalid - invalid_vector el2h_sync_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid invalid_vector el1_fiq_invalid @@ -251,7 +259,7 @@ ENTRY(__kvm_hyp_vector) invalid_vect el2t_fiq_invalid // FIQ EL2t invalid_vect el2t_error_invalid // Error EL2t
- invalid_vect el2h_sync_invalid // Synchronous EL2h + valid_vect el2_sync // Synchronous EL2h invalid_vect el2h_irq_invalid // IRQ EL2h invalid_vect el2h_fiq_invalid // FIQ EL2h valid_vect el2_error // Error EL2h diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b5f095c6aa4b..484d00984c2e 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -268,10 +268,10 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) * saved the guest context yet, and we may return early... */ par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg(par_el1); + else + tmp = 1; /* back to the guest */ write_sysreg(par, par_el1);
if (unlikely(tmp & 1))
From: James Morse james.morse@arm.com
commit 71a7f8cb1ca4ca7214a700b1243626759b6c11d4 upstream.
AT instructions do a translation table walk and return the result, or the fault in PAR_EL1. KVM uses these to find the IPA when the value is not provided by the CPU in HPFAR_EL1.
If a translation table walk causes an external abort it is taken as an exception, even if it was due to an AT instruction. (DDI0487F.a's D5.2.11 "Synchronous faults generated by address translation instructions")
While we previously made KVM resilient to exceptions taken due to AT instructions, the device access causes mismatched attributes, and may occur speculatively. Prevent this, by forbidding a walk through memory described as device at stage2. Now such AT instructions will report a stage2 fault.
Such a fault will cause KVM to restart the guest. If the AT instructions always walk the page tables, but guest execution uses the translation cached in the TLB, the guest can't make forward progress until the TLB entry is evicted. This isn't a problem, as since commit 5dcd0fdbb492 ("KVM: arm64: Defer guest entry when an asynchronous exception is pending"), KVM will return to the host to process IRQs allowing the rest of the system to keep running.
Cc: stable@vger.kernel.org # v4.19 Signed-off-by: James Morse james.morse@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm64/include/asm/kvm_arm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8b284cbf8162..a3b6f58d188c 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -83,11 +83,12 @@ * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate + * PTW: Take a stage2 fault if a stage1 walk steps in device memory */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO) + HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
On Wed, Sep 02, 2020 at 11:08:17AM +0100, Andre Przywara wrote:
In some architectural corner cases, AT instructions can generate an exception, which KVM is not really ready to handle properly. Teach the code to handle this situation gracefully.
This is a backport of the respective upstream patches to v4.19(.142). James prepared and tested these already, but we were lacking the upstream commit IDs so far. I am sending this on his behalf, since he is off this week.
The original patches contained stable tags, but with a prerequisite patch in v5.3. Patch 2/4 is a backport of this one, patches 1/4 and 3/4 needed some massaging to apply and work on 4.19.
Now queued up, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org