From: Pawan Gupta pawan.kumar.gupta@linux.intel.com
[ Upstream commit baf8361e54550a48a7087b603313ad013cc13386 ]
MDS mitigation requires clearing the CPU buffers before returning to user. This needs to be done late in the exit-to-user path. Current location of VERW leaves a possibility of kernel data ending up in CPU buffers for memory accesses done after VERW such as:
1. Kernel data accessed by an NMI between VERW and return-to-user can remain in CPU buffers since NMI returning to kernel does not execute VERW to clear CPU buffers. 2. Alyssa reported that after VERW is executed, CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system call. Memory accesses during stack scrubbing can move kernel stack contents into CPU buffers. 3. When caller saved registers are restored after a return from function executing VERW, the kernel stack accesses can remain in CPU buffers(since they occur after VERW).
To fix this VERW needs to be moved very late in exit-to-user path.
In preparation for moving VERW to entry/exit asm code, create macros that can be used in asm. Also make VERW patching depend on a new feature flag X86_FEATURE_CLEAR_CPU_BUF.
Reported-by: Alyssa Milburn alyssa.milburn@intel.com Suggested-by: Andrew Cooper andrew.cooper3@citrix.com Suggested-by: Peter Zijlstra peterz@infradead.org Signed-off-by: Pawan Gupta pawan.kumar.gupta@linux.intel.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.in... Signed-off-by: Nikolay Borisov nik.borisov@suse.com --- arch/x86/entry/Makefile | 2 +- arch/x86/entry/entry.S | 23 +++++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 14 ++++++++++++++ 4 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 arch/x86/entry/entry.S
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 06fc70cf5433..b8da38e81e96 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -7,7 +7,7 @@ OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) -obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o
obj-y += vdso/ diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 000000000000..21bf05354670 --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,23 @@ +#include <linux/linkage.h> +#include <asm/export.h> +#include <asm/segment.h> +#include <asm/cache.h> + + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel) +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f42286e9a2b1..6d024db8384c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -/* FREE! ( 3*32+17) */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+17) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c8819358a332..ba069ed16f94 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -175,6 +175,18 @@ .Lskip_rsb_@: .endm
+/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "jmp .Lskip_verw_@", "", X86_FEATURE_CLEAR_CPU_BUF + verw _ASM_RIP(mds_verw_sel) +.Lskip_verw_@: +.endm #else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \ @@ -346,6 +358,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+extern u16 mds_verw_sel; + #include <asm/segment.h>
/**