From: Ard Biesheuvel ardb@kernel.org
Please merge the attached series into stable branches v6.6 and v6.8. They backport changes that are part of the work to harden the EFI stub and make it compatible with MS requirements on EFI memory protections on secure boot enabled systems.
Note that the first patch by Hou Wenlong is already in v6.8. The remaining ones should apply equally to v6.6 and v6.8. Only patch #5 was tweaked for context changes due to backports that overtook this one.
Thanks.
Ard Biesheuvel (5): efi/libstub: Add generic support for parsing mem_encrypt= x86/boot: Move mem_encrypt= parsing to the decompressor x86/sme: Move early SME kernel encryption handling into .head.text x86/sev: Move early startup code into .head.text section x86/efistub: Remap kernel text read-only before dropping NX attribute
Hou Wenlong (1): x86/head/64: Move the __head definition to <asm/init.h>
arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/misc.c | 16 +++++ arch/x86/boot/compressed/sev.c | 3 + arch/x86/include/asm/boot.h | 1 + arch/x86/include/asm/init.h | 2 + arch/x86/include/asm/mem_encrypt.h | 8 +-- arch/x86/include/asm/sev.h | 10 +-- arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/kernel/head64.c | 3 +- arch/x86/kernel/sev-shared.c | 23 +++--- arch/x86/kernel/sev.c | 14 ++-- arch/x86/lib/Makefile | 13 ---- arch/x86/mm/mem_encrypt_identity.c | 74 ++++++-------------- drivers/firmware/efi/libstub/efi-stub-helper.c | 8 +++ drivers/firmware/efi/libstub/efistub.h | 2 +- drivers/firmware/efi/libstub/x86-stub.c | 14 +++- 16 files changed, 94 insertions(+), 100 deletions(-)
From: Hou Wenlong houwenlong.hwl@antgroup.com
[ Commit d2a285d65bfde3218fd0c3b88794d0135ced680b upstream ]
Move the __head section definition to a header to widen its use.
An upcoming patch will mark the code as __head in mem_encrypt_identity.c too.
Signed-off-by: Hou Wenlong houwenlong.hwl@antgroup.com Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lore.kernel.org/r/0583f57977be184689c373fe540cbd7d85ca2047.169752540... Signed-off-by: Ard Biesheuvel ardb@kernel.org --- arch/x86/include/asm/init.h | 2 ++ arch/x86/kernel/head64.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 5f1d3c421f68..cc9ccf61b6bd 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_INIT_H #define _ASM_X86_INIT_H
+#define __head __section(".head.text") + struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ void *context; /* context for alloc_pgt_page */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index bbc21798df10..c58213bce294 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -41,6 +41,7 @@ #include <asm/trapnr.h> #include <asm/sev.h> #include <asm/tdx.h> +#include <asm/init.h>
/* * Manage page tables very early on. @@ -84,8 +85,6 @@ static struct desc_ptr startup_gdt_descr = { .address = 0, };
-#define __head __section(".head.text") - static void __head *fixup_pointer(void *ptr, unsigned long physaddr) { return ptr - (void *)_text + (void *)physaddr;
From: Ard Biesheuvel ardb@kernel.org
[ Commit 7205f06e847422b66c1506eee01b9998ffc75d76 upstream ]
Parse the mem_encrypt= command line parameter from the EFI stub if CONFIG_ARCH_HAS_MEM_ENCRYPT=y, so that it can be passed to the early boot code by the arch code in the stub.
This avoids the need for the core kernel to do any string parsing very early in the boot.
Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Borislav Petkov (AMD) bp@alien8.de Tested-by: Tom Lendacky thomas.lendacky@amd.com Link: https://lore.kernel.org/r/20240227151907.387873-16-ardb+git@google.com Signed-off-by: Ard Biesheuvel ardb@kernel.org --- drivers/firmware/efi/libstub/efi-stub-helper.c | 8 ++++++++ drivers/firmware/efi/libstub/efistub.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index bfa30625f5d0..3dc2f9aaf08d 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -24,6 +24,8 @@ static bool efi_noinitrd; static bool efi_nosoftreserve; static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);
+int efi_mem_encrypt; + bool __pure __efi_soft_reserve_enabled(void) { return !efi_nosoftreserve; @@ -75,6 +77,12 @@ efi_status_t efi_parse_options(char const *cmdline) efi_noinitrd = true; } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { efi_no5lvl = true; + } else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) && + !strcmp(param, "mem_encrypt") && val) { + if (parse_option_str(val, "on")) + efi_mem_encrypt = 1; + else if (parse_option_str(val, "off")) + efi_mem_encrypt = -1; } else if (!strcmp(param, "efi") && val) { efi_nochunk = parse_option_str(val, "nochunk"); efi_novamap |= parse_option_str(val, "novamap"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index c04b82ea40f2..fc18fd649ed7 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -37,8 +37,8 @@ extern bool efi_no5lvl; extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; +extern int efi_mem_encrypt; extern bool efi_novamap; - extern const efi_system_table_t *efi_system_table;
typedef union efi_dxe_services_table efi_dxe_services_table_t;
From: Ard Biesheuvel ardb@kernel.org
[ Commit cd0d9d92c8bb46e77de62efd7df13069ddd61e7d upstream ]
The early SME/SEV code parses the command line very early, in order to decide whether or not memory encryption should be enabled, which needs to occur even before the initial page tables are created.
This is problematic for a number of reasons: - this early code runs from the 1:1 mapping provided by the decompressor or firmware, which uses a different translation than the one assumed by the linker, and so the code needs to be built in a special way; - parsing external input while the entire kernel image is still mapped writable is a bad idea in general, and really does not belong in security minded code; - the current code ignores the built-in command line entirely (although this appears to be the case for the entire decompressor)
Given that the decompressor/EFI stub is an intrinsic part of the x86 bootable kernel image, move the command line parsing there and out of the core kernel. This removes the need to build lib/cmdline.o in a special way, or to use RIP-relative LEA instructions in inline asm blocks.
This involves a new xloadflag in the setup header to indicate that mem_encrypt=on appeared on the kernel command line.
Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Borislav Petkov (AMD) bp@alien8.de Tested-by: Tom Lendacky thomas.lendacky@amd.com Link: https://lore.kernel.org/r/20240227151907.387873-17-ardb+git@google.com Signed-off-by: Ard Biesheuvel ardb@kernel.org --- arch/x86/boot/compressed/misc.c | 15 +++++++++ arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/lib/Makefile | 13 -------- arch/x86/mm/mem_encrypt_identity.c | 32 ++------------------ drivers/firmware/efi/libstub/x86-stub.c | 3 ++ 5 files changed, 22 insertions(+), 42 deletions(-)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index f711f2a85862..c6136a1be283 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -357,6 +357,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, return entry; }
+/* + * Set the memory encryption xloadflag based on the mem_encrypt= command line + * parameter, if provided. + */ +static void parse_mem_encrypt(struct setup_header *hdr) +{ + int on = cmdline_find_option_bool("mem_encrypt=on"); + int off = cmdline_find_option_bool("mem_encrypt=off"); + + if (on > off) + hdr->xloadflags |= XLF_MEM_ENCRYPTION; +} + /* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel @@ -387,6 +400,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) /* Clear flags intended for solely in-kernel use. */ boot_params->hdr.loadflags &= ~KASLR_FLAG;
+ parse_mem_encrypt(&boot_params->hdr); + sanitize_boot_params(boot_params);
if (boot_params->screen_info.orig_video_mode == 7) { diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 01d19fc22346..eeea058cf602 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -38,6 +38,7 @@ #define XLF_EFI_KEXEC (1<<4) #define XLF_5LEVEL (1<<5) #define XLF_5LEVEL_ENABLED (1<<6) +#define XLF_MEM_ENCRYPTION (1<<7)
#ifndef __ASSEMBLY__
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index ea3a28e7b613..f0dae4fb6d07 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,19 +14,6 @@ ifdef CONFIG_KCSAN CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE) endif
-# Early boot use of cmdline; don't instrument it -ifdef CONFIG_AMD_MEM_ENCRYPT -KCOV_INSTRUMENT_cmdline.o := n -KASAN_SANITIZE_cmdline.o := n -KCSAN_SANITIZE_cmdline.o := n - -ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_cmdline.o = -pg -endif - -CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables -endif - inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt quiet_cmd_inat_tables = GEN $@ diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 0166ab1780cc..d210c7fc8fa2 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -43,7 +43,6 @@
#include <asm/setup.h> #include <asm/sections.h> -#include <asm/cmdline.h> #include <asm/coco.h> #include <asm/sev.h>
@@ -95,9 +94,6 @@ struct sme_populate_pgd_data { */ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
-static char sme_cmdline_arg[] __initdata = "mem_encrypt"; -static char sme_cmdline_on[] __initdata = "on"; - static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; @@ -504,11 +500,9 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
void __init sme_enable(struct boot_params *bp) { - const char *cmdline_ptr, *cmdline_arg, *cmdline_on; unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; unsigned long me_mask; - char buffer[16]; bool snp; u64 msr;
@@ -551,6 +545,9 @@ void __init sme_enable(struct boot_params *bp)
/* Check if memory encryption is enabled */ if (feature_mask == AMD_SME_BIT) { + if (!(bp->hdr.xloadflags & XLF_MEM_ENCRYPTION)) + return; + /* * No SME if Hypervisor bit is set. This check is here to * prevent a guest from trying to enable SME. For running as a @@ -570,31 +567,8 @@ void __init sme_enable(struct boot_params *bp) msr = __rdmsr(MSR_AMD64_SYSCFG); if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; - } else { - /* SEV state cannot be controlled by a command line option */ - goto out; }
- /* - * Fixups have not been applied to phys_base yet and we're running - * identity mapped, so we must obtain the address to the SME command - * line argument data using rip-relative addressing. - */ - asm ("lea sme_cmdline_arg(%%rip), %0" - : "=r" (cmdline_arg) - : "p" (sme_cmdline_arg)); - asm ("lea sme_cmdline_on(%%rip), %0" - : "=r" (cmdline_on) - : "p" (sme_cmdline_on)); - - cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | - ((u64)bp->ext_cmd_line_ptr << 32)); - - if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 || - strncmp(buffer, cmdline_on, sizeof(buffer))) - return; - -out: RIP_REL_REF(sme_me_mask) = me_mask; physical_mask &= ~me_mask; cc_vendor = CC_VENDOR_AMD; diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 8307950fe3ce..6224f5c40532 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -888,6 +888,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, } }
+ if (efi_mem_encrypt > 0) + hdr->xloadflags |= XLF_MEM_ENCRYPTION; + status = efi_decompress_kernel(&kernel_entry); if (status != EFI_SUCCESS) { efi_err("Failed to decompress kernel\n");
On Mon, Apr 08, 2024 at 08:49:21AM +0200, Ard Biesheuvel wrote:
From: Ard Biesheuvel ardb@kernel.org
[ Commit cd0d9d92c8bb46e77de62efd7df13069ddd61e7d upstream ]
The early SME/SEV code parses the command line very early, in order to decide whether or not memory encryption should be enabled, which needs to occur even before the initial page tables are created.
This is problematic for a number of reasons:
- this early code runs from the 1:1 mapping provided by the decompressor or firmware, which uses a different translation than the one assumed by the linker, and so the code needs to be built in a special way;
- parsing external input while the entire kernel image is still mapped writable is a bad idea in general, and really does not belong in security minded code;
- the current code ignores the built-in command line entirely (although this appears to be the case for the entire decompressor)
Given that the decompressor/EFI stub is an intrinsic part of the x86 bootable kernel image, move the command line parsing there and out of the core kernel. This removes the need to build lib/cmdline.o in a special way, or to use RIP-relative LEA instructions in inline asm blocks.
This involves a new xloadflag in the setup header to indicate that mem_encrypt=on appeared on the kernel command line.
Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Borislav Petkov (AMD) bp@alien8.de Tested-by: Tom Lendacky thomas.lendacky@amd.com Link: https://lore.kernel.org/r/20240227151907.387873-17-ardb+git@google.com Signed-off-by: Ard Biesheuvel ardb@kernel.org
arch/x86/boot/compressed/misc.c | 15 +++++++++ arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/lib/Makefile | 13 -------- arch/x86/mm/mem_encrypt_identity.c | 32 ++------------------ drivers/firmware/efi/libstub/x86-stub.c | 3 ++ 5 files changed, 22 insertions(+), 42 deletions(-)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index f711f2a85862..c6136a1be283 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -357,6 +357,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, return entry; } +/*
- Set the memory encryption xloadflag based on the mem_encrypt= command line
- parameter, if provided.
- */
+static void parse_mem_encrypt(struct setup_header *hdr) +{
- int on = cmdline_find_option_bool("mem_encrypt=on");
- int off = cmdline_find_option_bool("mem_encrypt=off");
- if (on > off)
hdr->xloadflags |= XLF_MEM_ENCRYPTION;
+}
/*
- The compressed kernel image (ZO), has been moved so that its position
- is against the end of the buffer used to hold the uncompressed kernel
@@ -387,6 +400,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) /* Clear flags intended for solely in-kernel use. */ boot_params->hdr.loadflags &= ~KASLR_FLAG;
- parse_mem_encrypt(&boot_params->hdr);
- sanitize_boot_params(boot_params);
if (boot_params->screen_info.orig_video_mode == 7) {
This patch didn't apply on 6.6.y, so I applied it by hand, but it turns out there is no "boot_parms" on 6.6.y, so it breaks the build.
So I've dropped this one from the 6.6.y tree now, if you can submit it in a form that at least compiles, I'll take it :)
thanks,
greg k-h
On Mon, 8 Apr 2024 at 14:37, Greg KH gregkh@linuxfoundation.org wrote:
On Mon, Apr 08, 2024 at 08:49:21AM +0200, Ard Biesheuvel wrote:
From: Ard Biesheuvel ardb@kernel.org
[ Commit cd0d9d92c8bb46e77de62efd7df13069ddd61e7d upstream ]
The early SME/SEV code parses the command line very early, in order to decide whether or not memory encryption should be enabled, which needs to occur even before the initial page tables are created.
This is problematic for a number of reasons:
- this early code runs from the 1:1 mapping provided by the decompressor or firmware, which uses a different translation than the one assumed by the linker, and so the code needs to be built in a special way;
- parsing external input while the entire kernel image is still mapped writable is a bad idea in general, and really does not belong in security minded code;
- the current code ignores the built-in command line entirely (although this appears to be the case for the entire decompressor)
Given that the decompressor/EFI stub is an intrinsic part of the x86 bootable kernel image, move the command line parsing there and out of the core kernel. This removes the need to build lib/cmdline.o in a special way, or to use RIP-relative LEA instructions in inline asm blocks.
This involves a new xloadflag in the setup header to indicate that mem_encrypt=on appeared on the kernel command line.
Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Borislav Petkov (AMD) bp@alien8.de Tested-by: Tom Lendacky thomas.lendacky@amd.com Link: https://lore.kernel.org/r/20240227151907.387873-17-ardb+git@google.com Signed-off-by: Ard Biesheuvel ardb@kernel.org
arch/x86/boot/compressed/misc.c | 15 +++++++++ arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/lib/Makefile | 13 -------- arch/x86/mm/mem_encrypt_identity.c | 32 ++------------------ drivers/firmware/efi/libstub/x86-stub.c | 3 ++ 5 files changed, 22 insertions(+), 42 deletions(-)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index f711f2a85862..c6136a1be283 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -357,6 +357,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, return entry; }
+/*
- Set the memory encryption xloadflag based on the mem_encrypt= command line
- parameter, if provided.
- */
+static void parse_mem_encrypt(struct setup_header *hdr) +{
int on = cmdline_find_option_bool("mem_encrypt=on");
int off = cmdline_find_option_bool("mem_encrypt=off");
if (on > off)
hdr->xloadflags |= XLF_MEM_ENCRYPTION;
+}
/*
- The compressed kernel image (ZO), has been moved so that its position
- is against the end of the buffer used to hold the uncompressed kernel
@@ -387,6 +400,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) /* Clear flags intended for solely in-kernel use. */ boot_params->hdr.loadflags &= ~KASLR_FLAG;
parse_mem_encrypt(&boot_params->hdr);
sanitize_boot_params(boot_params); if (boot_params->screen_info.orig_video_mode == 7) {
This patch didn't apply on 6.6.y, so I applied it by hand, but it turns out there is no "boot_parms" on 6.6.y, so it breaks the build.
If you apply it by hand, it will be called boot_params_ptr, and that indeed does not exist yet on 6.6.y. The patch accounts for that.
However, it is bizarre that this fails to apply, given that I generated the patches from a tree based on 6.6.25. Does it conflict with something else you have queued up?
So I've dropped this one from the 6.6.y tree now, if you can submit it in a form that at least compiles, I'll take it :)
Happy to do so, but I'll need another base if 6.6.25 is not sufficient.
From: Ard Biesheuvel ardb@kernel.org
[ Commit 48204aba801f1b512b3abed10b8e1a63e03f3dd1 upstream ]
The .head.text section is the initial primary entrypoint of the core kernel, and is entered with the CPU executing from a 1:1 mapping of memory. Such code must never access global variables using absolute references, as these are based on the kernel virtual mapping which is not active yet at this point.
Given that the SME startup code is also called from this early execution context, move it into .head.text as well. This will allow more thorough build time checks in the future to ensure that early startup code only uses RIP-relative references to global variables.
Also replace some occurrences of __pa_symbol() [which relies on the compiler generating an absolute reference, which is not guaranteed] and an open coded RIP-relative access with RIP_REL_REF().
Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Borislav Petkov (AMD) bp@alien8.de Tested-by: Tom Lendacky thomas.lendacky@amd.com Link: https://lore.kernel.org/r/20240227151907.387873-18-ardb+git@google.com Signed-off-by: Ard Biesheuvel ardb@kernel.org --- arch/x86/include/asm/mem_encrypt.h | 8 ++-- arch/x86/mm/mem_encrypt_identity.c | 42 ++++++++------------ 2 files changed, 21 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index f4f526984629..76081a34fc23 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -46,8 +46,8 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); void __init sev_setup_arch(void);
-void __init sme_encrypt_kernel(struct boot_params *bp); -void __init sme_enable(struct boot_params *bp); +void sme_encrypt_kernel(struct boot_params *bp); +void sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); @@ -81,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } static inline void __init sev_setup_arch(void) { }
-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } -static inline void __init sme_enable(struct boot_params *bp) { } +static inline void sme_encrypt_kernel(struct boot_params *bp) { } +static inline void sme_enable(struct boot_params *bp) { }
static inline void sev_es_init_vc_handling(void) { }
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index d210c7fc8fa2..64b5005d49e5 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -41,6 +41,7 @@ #include <linux/mem_encrypt.h> #include <linux/cc_platform.h>
+#include <asm/init.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/coco.h> @@ -94,7 +95,7 @@ struct sme_populate_pgd_data { */ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
-static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; @@ -109,7 +110,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) memset(pgd_p, 0, pgd_size); }
-static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) +static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd; p4d_t *p4d; @@ -146,7 +147,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) return pud; }
-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -162,7 +163,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); }
-static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -188,7 +189,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); }
-static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); @@ -198,7 +199,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) } }
-static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd(ppd); @@ -208,7 +209,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) } }
-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, +static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, pmdval_t pmd_flags, pteval_t pte_flags) { unsigned long vaddr_end; @@ -232,22 +233,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, __sme_map_range_pte(ppd); }
-static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); }
-static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); }
-static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); }
-static unsigned long __init sme_pgtable_calc(unsigned long len) +static unsigned long __head sme_pgtable_calc(unsigned long len) { unsigned long entries = 0, tables = 0;
@@ -284,7 +285,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return entries + tables; }
-void __init sme_encrypt_kernel(struct boot_params *bp) +void __head sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; @@ -319,9 +320,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * memory from being cached. */
- /* Physical addresses gives us the identity mapped virtual addresses */ - kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); + kernel_start = (unsigned long)RIP_REL_REF(_text); + kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start;
initrd_start = 0; @@ -338,14 +338,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp) } #endif
- /* - * We're running identity mapped, so we must obtain the address to the - * SME encryption workarea using rip-relative addressing. - */ - asm ("lea sme_workarea(%%rip), %0" - : "=r" (workarea_start) - : "p" (sme_workarea)); - /* * Calculate required number of workarea bytes needed: * executable encryption area size: @@ -355,7 +347,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ - execute_start = workarea_start; + execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea); execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start;
@@ -498,7 +490,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) native_write_cr3(__native_read_cr3()); }
-void __init sme_enable(struct boot_params *bp) +void __head sme_enable(struct boot_params *bp) { unsigned int eax, ebx, ecx, edx; unsigned long feature_mask;
From: Ard Biesheuvel ardb@kernel.org
[ Commit 428080c9b19bfda37c478cd626dbd3851db1aff9 upstream ]
In preparation for implementing rigorous build time checks to enforce that only code that can support it will be called from the early 1:1 mapping of memory, move SEV init code that is called in this manner to the .head.text section.
Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Borislav Petkov (AMD) bp@alien8.de Tested-by: Tom Lendacky thomas.lendacky@amd.com Link: https://lore.kernel.org/r/20240227151907.387873-19-ardb+git@google.com Signed-off-by: Ard Biesheuvel ardb@kernel.org --- arch/x86/boot/compressed/sev.c | 3 +++ arch/x86/include/asm/sev.h | 10 ++++----- arch/x86/kernel/sev-shared.c | 23 +++++++++----------- arch/x86/kernel/sev.c | 14 +++++++----- 4 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 80d76aea1f7b..0a49218a516a 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -116,6 +116,9 @@ static bool fault_in_kernel_space(unsigned long address) #undef __init #define __init
+#undef __head +#define __head + #define __BOOT_COMPRESSED
/* Basic instruction decoding support needed */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 36f905797075..75a5388d4068 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -199,15 +199,15 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) struct snp_guest_request_ioctl;
void setup_ghcb(void); -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned long npages); -void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned long npages); +void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned long npages); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void __init __noreturn snp_abort(void); +void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 466fe09898cc..acbec4de3ec3 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -89,7 +89,8 @@ static bool __init sev_es_check_cpu_features(void) return true; }
-static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) +static void __head __noreturn +sev_es_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ;
@@ -326,13 +327,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid */ static const struct snp_cpuid_table *snp_cpuid_get_table(void) { - void *ptr; - - asm ("lea cpuid_table_copy(%%rip), %0" - : "=r" (ptr) - : "p" (&cpuid_table_copy)); - - return ptr; + return &RIP_REL_REF(cpuid_table_copy); }
/* @@ -391,7 +386,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) return xsave_size; }
-static bool +static bool __head snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -528,7 +523,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +static int __head +snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -570,7 +566,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code. */ -void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); @@ -1016,7 +1012,8 @@ struct cc_setup_data { * Search for a Confidential Computing blob passed in as a setup_data entry * via the Linux Boot Protocol. */ -static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) +static __head +struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) { struct cc_setup_data *sd = NULL; struct setup_data *hdr; @@ -1043,7 +1040,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) * mapping needs to be updated in sync with all the changes to virtual memory * layout and related mapping facilities throughout the boot process. */ -static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) { const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; int i; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index a8db68a063c4..9905dc0e0b09 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -26,6 +26,7 @@ #include <linux/dmi.h> #include <uapi/linux/sev-guest.h>
+#include <asm/init.h> #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> #include <asm/sev.h> @@ -683,8 +684,9 @@ static u64 __init get_jump_table_addr(void) return ret; }
-static void early_set_pages_state(unsigned long vaddr, unsigned long paddr, - unsigned long npages, enum psc_op op) +static void __head +early_set_pages_state(unsigned long vaddr, unsigned long paddr, + unsigned long npages, enum psc_op op) { unsigned long paddr_end; u64 val; @@ -740,7 +742,7 @@ static void early_set_pages_state(unsigned long vaddr, unsigned long paddr, sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); }
-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, +void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { /* @@ -2045,7 +2047,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs) * * Scan for the blob in that order. */ -static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) { struct cc_blob_sev_info *cc_info;
@@ -2071,7 +2073,7 @@ static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) return cc_info; }
-bool __init snp_init(struct boot_params *bp) +bool __head snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info;
@@ -2093,7 +2095,7 @@ bool __init snp_init(struct boot_params *bp) return true; }
-void __init __noreturn snp_abort(void) +void __head __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); }
From: Ard Biesheuvel ardb@kernel.org
[ Commit 9c55461040a9264b7e44444c53d26480b438eda6 upstream ]
Currently, the EFI stub invokes the EFI memory attributes protocol to strip any NX restrictions from the entire loaded kernel, resulting in all code and data being mapped read-write-execute.
The point of the EFI memory attributes protocol is to remove the need for all memory allocations to be mapped with both write and execute permissions by default, and make it the OS loader's responsibility to transition data mappings to code mappings where appropriate.
Even though the UEFI specification does not appear to leave room for denying memory attribute changes based on security policy, let's be cautious and avoid relying on the ability to create read-write-execute mappings. This is trivially achievable, given that the amount of kernel code executing via the firmware's 1:1 mapping is rather small and limited to the .head.text region. So let's drop the NX restrictions only on that subregion, but not before remapping it as read-only first.
Signed-off-by: Ard Biesheuvel ardb@kernel.org --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/misc.c | 1 + arch/x86/include/asm/boot.h | 1 + drivers/firmware/efi/libstub/x86-stub.c | 11 ++++++++++- 4 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 71fc531b95b4..583c11664c63 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T hostprogs := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include
-sed-voffset := -e 's/^([0-9a-fA-F]*) [ABCDGRSTVW] (_text|__bss_start|_end)$$/#define VO_\2 _AC(0x\1,UL)/p' +sed-voffset := -e 's/^([0-9a-fA-F]*) [ABCDGRSTVW] (_text|__start_rodata|__bss_start|_end)$$/#define VO_\2 _AC(0x\1,UL)/p'
quiet_cmd_voffset = VOFFSET $@ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index c6136a1be283..b5ecbd32a46f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,6 +330,7 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; }
+const unsigned long kernel_text_size = VO___start_rodata - VO__text; const unsigned long kernel_total_size = VO__end - VO__text;
static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index b3a7cfb0d99e..c945c893c52e 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -81,6 +81,7 @@
#ifndef __ASSEMBLY__ extern unsigned int output_len; +extern const unsigned long kernel_text_size; extern const unsigned long kernel_total_size;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 6224f5c40532..e4ae3db727ef 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -238,6 +238,15 @@ efi_status_t efi_adjust_memory_range_protection(unsigned long start, rounded_end = roundup(start + size, EFI_PAGE_SIZE);
if (memattr != NULL) { + status = efi_call_proto(memattr, set_memory_attributes, + rounded_start, + rounded_end - rounded_start, + EFI_MEMORY_RO); + if (status != EFI_SUCCESS) { + efi_warn("Failed to set EFI_MEMORY_RO attribute\n"); + return status; + } + status = efi_call_proto(memattr, clear_memory_attributes, rounded_start, rounded_end - rounded_start, @@ -816,7 +825,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
*kernel_entry = addr + entry;
- return efi_adjust_memory_range_protection(addr, kernel_total_size); + return efi_adjust_memory_range_protection(addr, kernel_text_size); }
static void __noreturn enter_kernel(unsigned long kernel_addr,
On Mon, Apr 08, 2024 at 08:49:18AM +0200, Ard Biesheuvel wrote:
From: Ard Biesheuvel ardb@kernel.org
Please merge the attached series into stable branches v6.6 and v6.8. They backport changes that are part of the work to harden the EFI stub and make it compatible with MS requirements on EFI memory protections on secure boot enabled systems.
Note that the first patch by Hou Wenlong is already in v6.8. The remaining ones should apply equally to v6.6 and v6.8. Only patch #5 was tweaked for context changes due to backports that overtook this one.
All now queued up, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org