On Tue, Apr 01, 2025 at 10:07:18AM -0500, Tom Lendacky wrote:
On 4/1/25 02:57, Kirill Shutemov wrote:
On Mon, Mar 31, 2025 at 04:14:40PM -0700, Dan Williams wrote:
Nikolay reports [1] that accessing BIOS data (first 1MB of the physical address space) via /dev/mem results in an SEPT violation.
The cause is ioremap() (via xlate_dev_mem_ptr()) establishing an unencrypted mapping where the kernel had established an encrypted mapping previously.
Teach __ioremap_check_other() that this address space shall always be mapped as encrypted as historically it is memory resident data, not MMIO with side-effects.
I am not sure if all AMD platforms would survive that.
Tom?
I haven't tested this, yet, but with SME the BIOS is not encrypted, so that would need an unencrypted mapping.
Could you qualify your mapping with a TDX check? Or can you do something in the /dev/mem support to map appropriately?
I'm adding @Naveen since he is preparing a patch to prevent /dev/mem from accessing ROM areas under SNP as those can trigger #VC for a page that is mapped encrypted but has not been validated. He's looking at possibly adding something to x86_platform_ops that can be overridden. The application would get a bad return code vs an exception.
The thought with x86_platform_ops was that TDX may want to differ and setup separate ranges to deny access to. For SEV-SNP, we primarily want to disallow the video ROM range at this point. Something like the below.
If this is not something TDX wants, then we should be able to add a check for SNP in devmem_is_allowed() directly without the x86_platform_ops.
- Naveen
--- diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 583df2c6a2e3..fa9f23200ee4 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -761,6 +761,18 @@ static u64 __init get_jump_table_addr(void) return ret; }
+bool sev_snp_mem_access_allowed(unsigned long pfn) +{ + /* + * Reject access to ROM address range (0xc0000 to 0xfffff) for SEV-SNP guests + * as that address range is not validated, so access can cause #VC exception + */ + if (pfn >= 0xc0 && pfn <= 0xff) + return 0; + + return 1; +} + static void __head early_set_pages_state(unsigned long vaddr, unsigned long paddr, unsigned long npages, enum psc_op op) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ba7999f66abe..f94522da9eb5 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -454,6 +454,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) struct snp_guest_request_ioctl;
void setup_ghcb(void); +bool sev_snp_mem_access_allowed(unsigned long pfn); void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages); void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, @@ -496,6 +497,7 @@ static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } +static inline bool sev_snp_mem_access_allowed(unsigned long pfn) { return true; } static inline void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 36698cc9fb44..0add7878e413 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -307,6 +307,7 @@ struct x86_hyper_runtime { * @realmode_reserve: reserve memory for realmode trampoline * @realmode_init: initialize realmode trampoline * @hyper: x86 hypervisor specific runtime callbacks + * @mem_access_allowed: filter accesses to pfn */ struct x86_platform_ops { unsigned long (*calibrate_cpu)(void); @@ -324,6 +325,7 @@ struct x86_platform_ops { void (*set_legacy_features)(void); void (*realmode_reserve)(void); void (*realmode_init)(void); + bool (*mem_access_allowed)(unsigned long pfn); struct x86_hyper_runtime hyper; struct x86_guest guest; }; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 0a2bbd674a6d..83217de27b46 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -142,6 +142,7 @@ static bool enc_cache_flush_required_noop(void) { return false; } static void enc_kexec_begin_noop(void) {} static void enc_kexec_finish_noop(void) {} static bool is_private_mmio_noop(u64 addr) {return false; } +static bool mem_access_allowed_noop(unsigned long pfn) { return true; }
struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, @@ -156,6 +157,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { .restore_sched_clock_state = tsc_restore_sched_clock_state, .realmode_reserve = reserve_real_mode, .realmode_init = init_real_mode, + .mem_access_allowed = mem_access_allowed_noop, .hyper.pin_vcpu = x86_op_int_noop, .hyper.is_private_mmio = is_private_mmio_noop,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bfa444a7dbb0..64750d710f9f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -867,6 +867,9 @@ void __init poking_init(void) */ int devmem_is_allowed(unsigned long pagenr) { + if (!x86_platform.mem_access_allowed(pagenr)) + return 0; + if (region_intersects(PFN_PHYS(pagenr), PAGE_SIZE, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) != REGION_DISJOINT) { diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 7490ff6d83b1..75e2d86cdab9 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -532,6 +532,7 @@ void __init sme_early_init(void) * parsing has happened. */ x86_init.resources.dmi_setup = snp_dmi_setup; + x86_platform.mem_access_allowed = sev_snp_mem_access_allowed; }
/*