This series addresses two security vulnerabilities (CVE-2023-0597 [1], CVE-2023-3640 [2]) in the x86 memory management subsystem, alongside prerequisite [3] patches necessary for stable integration.
[PATCH 5.10/5.15/6.1 1/5] x86/kasan: Map shadow for percpu pages on demand Ensures KASAN shadow mapping on demand for per-CPU pages.
[PATCH 5.10/5.15/6.1 2/5] x86/mm: Recompute physical address for every page of per-CPU CEA mapping Calculates accurate physical addresses across CPU entry areas.
[PATCH 5.10/5.15/6.1 3/5] x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area Populates KASAN shadow memory for debugging across CPU entry areas.
[PATCH 5.10/5.15/6.1 4/5] x86/mm: Randomize per-cpu entry area Randomizes the per-CPU entry area to reduce the risk of information leakage due to predictable memory layouts, especially in systems without KASLR, as described in CVE-2023-0597 [1].
[PATCH 5.10/5.15/6.1 5/5] x86/mm: Do not shuffle CPU entry areas without KASLR Prevents CPU entry area shuffling when KASLR is disabled, mitigating information leakage risks, as stated in CVE-2023-3640 [2].
[1] https://nvd.nist.gov/vuln/detail/CVE-2023-0597 [2] https://nvd.nist.gov/vuln/detail/CVE-2023-3640 [3] https://patchwork.ozlabs.org/project/ubuntu-kernel/cover/20230903234603.8599...
From: Andrey Ryabinin ryabinin.a.a@gmail.com
commit 3f148f3318140035e87decc1214795ff0755757b upstream.
KASAN maps shadow for the entire CPU-entry-area: [CPU_ENTRY_AREA_BASE, CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE]
This will explode once the per-cpu entry areas are randomized since it will increase CPU_ENTRY_AREA_MAP_SIZE to 512 GB and KASAN fails to allocate shadow for such big area.
Fix this by allocating KASAN shadow only for really used cpu entry area addresses mapped by cea_map_percpu_pages()
Thanks to the 0day folks for finding and reporting this to be an issue.
[ dhansen: tweak changelog since this will get committed before peterz's actual cpu-entry-area randomization ]
Signed-off-by: Andrey Ryabinin ryabinin.a.a@gmail.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Tested-by: Yujie Liu yujie.liu@intel.com Cc: kernel test robot yujie.liu@intel.com Link: https://lore.kernel.org/r/202210241508.2e203c3d-yujie.liu@intel.com Signed-off-by: Vasiliy Kovalev kovalev@altlinux.org --- arch/x86/include/asm/kasan.h | 3 +++ arch/x86/mm/cpu_entry_area.c | 8 +++++++- arch/x86/mm/kasan_init_64.c | 15 ++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 13e70da38beda..de75306b932ef 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -28,9 +28,12 @@ #ifdef CONFIG_KASAN void __init kasan_early_init(void); void __init kasan_init(void); +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); #else static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } +static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, + int nid) { } #endif
#endif diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 6c2f1b76a0b61..d7081b1accca6 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -9,6 +9,7 @@ #include <asm/cpu_entry_area.h> #include <asm/fixmap.h> #include <asm/desc.h> +#include <asm/kasan.h>
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
@@ -53,8 +54,13 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { + phys_addr_t pa = per_cpu_ptr_to_phys(ptr); + + kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, + early_pfn_to_nid(PFN_DOWN(pa))); + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); + cea_set_pte(cea_vaddr, pa, prot); }
static void __init percpu_setup_debug_store(unsigned int cpu) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1a50434c8a4da..18d574af30efb 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -318,6 +318,18 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); }
+void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) +{ + unsigned long shadow_start, shadow_end; + + shadow_start = (unsigned long)kasan_mem_to_shadow(va); + shadow_start = round_down(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); + shadow_end = round_up(shadow_end, PAGE_SIZE); + + kasan_populate_shadow(shadow_start, shadow_end, nid); +} + void __init kasan_init(void) { int i; @@ -395,9 +407,6 @@ void __init kasan_init(void) kasan_mem_to_shadow((void *)VMALLOC_END + 1), shadow_cpu_entry_begin);
- kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, - (unsigned long)shadow_cpu_entry_end, 0); - kasan_populate_early_shadow(shadow_cpu_entry_end, kasan_mem_to_shadow((void *)__START_KERNEL_map));
From: Sean Christopherson seanjc@google.com
commit 80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 upstream.
Recompute the physical address for each per-CPU page in the CPU entry area, a recent commit inadvertantly modified cea_map_percpu_pages() such that every PTE is mapped to the physical address of the first page.
Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Signed-off-by: Sean Christopherson seanjc@google.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com Link: https://lkml.kernel.org/r/20221110203504.1985010-2-seanjc@google.com Signed-off-by: Vasiliy Kovalev kovalev@altlinux.org --- arch/x86/mm/cpu_entry_area.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index d7081b1accca6..c24f33c11421b 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -60,7 +60,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) early_pfn_to_nid(PFN_DOWN(pa)));
for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, pa, prot); + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); }
static void __init percpu_setup_debug_store(unsigned int cpu)
From: Sean Christopherson seanjc@google.com
commit 97650148a15e0b30099d6175ffe278b9f55ec66a upstream.
Populate a KASAN shadow for the entire possible per-CPU range of the CPU entry area instead of requiring that each individual chunk map a shadow. Mapping shadows individually is error prone, e.g. the per-CPU GDT mapping was left behind, which can lead to not-present page faults during KASAN validation if the kernel performs a software lookup into the GDT. The DS buffer is also likely affected.
The motivation for mapping the per-CPU areas on-demand was to avoid mapping the entire 512GiB range that's reserved for the CPU entry area, shaving a few bytes by not creating shadows for potentially unused memory was not a goal.
The bug is most easily reproduced by doing a sigreturn with a garbage CS in the sigcontext, e.g.
int main(void) { struct sigcontext regs;
syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
memset(®s, 0, sizeof(regs)); regs.cs = 0x1d0; syscall(__NR_rt_sigreturn); return 0; }
to coerce the kernel into doing a GDT lookup to compute CS.base when reading the instruction bytes on the subsequent #GP to determine whether or not the #GP is something the kernel should handle, e.g. to fixup UMIP violations or to emulate CLI/STI for IOPL=3 applications.
BUG: unable to handle page fault for address: fffffbc8379ace00 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 16c03a067 P4D 16c03a067 PUD 15b990067 PMD 15b98f067 PTE 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 3 PID: 851 Comm: r2 Not tainted 6.1.0-rc3-next-20221103+ #432 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kasan_check_range+0xdf/0x190 Call Trace: <TASK> get_desc+0xb0/0x1d0 insn_get_seg_base+0x104/0x270 insn_fetch_from_user+0x66/0x80 fixup_umip_exception+0xb1/0x530 exc_general_protection+0x181/0x210 asm_exc_general_protection+0x22/0x30 RIP: 0003:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0003:0000000000000000 EFLAGS: 00000202 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000001d0 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 </TASK>
Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Reported-by: syzbot+ffb4f000dc2872c93f62@syzkaller.appspotmail.com Suggested-by: Andrey Ryabinin ryabinin.a.a@gmail.com Signed-off-by: Sean Christopherson seanjc@google.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com Link: https://lkml.kernel.org/r/20221110203504.1985010-3-seanjc@google.com Signed-off-by: Vasiliy Kovalev kovalev@altlinux.org --- arch/x86/mm/cpu_entry_area.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index c24f33c11421b..5990797113c2e 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -54,11 +54,6 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { - phys_addr_t pa = per_cpu_ptr_to_phys(ptr); - - kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, - early_pfn_to_nid(PFN_DOWN(pa))); - for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } @@ -158,6 +153,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif
+ kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, + early_cpu_to_node(cpu)); + cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
cea_map_percpu_pages(&cea->entry_stack_page,
From: Peter Zijlstra peterz@infradead.org
commit 97e3d26b5e5f371b3ee223d94dd123e6c442ba80 upstream.
Seth found that the CPU-entry-area; the piece of per-cpu data that is mapped into the userspace page-tables for kPTI is not subject to any randomization -- irrespective of kASLR settings.
On x86_64 a whole P4D (512 GB) of virtual address space is reserved for this structure, which is plenty large enough to randomize things a little.
As such, use a straight forward randomization scheme that avoids duplicates to spread the existing CPUs over the available space.
[ bp: Fix le build. ]
Reported-by: Seth Jenkins sethjenkins@google.com Reviewed-by: Kees Cook keescook@chromium.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Signed-off-by: Borislav Petkov bp@suse.de Signed-off-by: Vasiliy Kovalev kovalev@altlinux.org --- arch/x86/include/asm/cpu_entry_area.h | 4 --- arch/x86/include/asm/pgtable_areas.h | 8 ++++- arch/x86/kernel/hw_breakpoint.c | 2 +- arch/x86/mm/cpu_entry_area.c | 47 ++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 75efc4c6f0766..462fc34f13176 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -130,10 +130,6 @@ struct cpu_entry_area { };
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) - -/* Total size includes the readonly IDT mapping page as well: */ -#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h index d34cce1b995cf..4f056fb88174b 100644 --- a/arch/x86/include/asm/pgtable_areas.h +++ b/arch/x86/include/asm/pgtable_areas.h @@ -11,6 +11,12 @@
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
-#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) +#ifdef CONFIG_X86_32 +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ + CPU_ENTRY_AREA_BASE) +#else +#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE +#endif
#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 668a4a6533d92..bbb0f737aab19 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
/* CPU entry erea is always used for CPU entry */ if (within_area(addr, end, CPU_ENTRY_AREA_BASE, - CPU_ENTRY_AREA_TOTAL_SIZE)) + CPU_ENTRY_AREA_MAP_SIZE)) return true;
/* diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 5990797113c2e..350868f181116 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -5,6 +5,7 @@ #include <linux/kallsyms.h> #include <linux/kcore.h> #include <linux/pgtable.h> +#include <linux/prandom.h>
#include <asm/cpu_entry_area.h> #include <asm/fixmap.h> @@ -16,16 +17,53 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); -#endif
-#ifdef CONFIG_X86_32 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return per_cpu(_cea_offset, cpu); +} + +static __init void init_cea_offsets(void) +{ + unsigned int max_cea; + unsigned int i, j; + + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; + + /* O(sodding terrible) */ + for_each_possible_cpu(i) { + unsigned int cea; + +again: + cea = prandom_u32_max(max_cea); + + for_each_possible_cpu(j) { + if (cea_offset(j) == cea) + goto again; + + if (i == j) + break; + } + + per_cpu(_cea_offset, i) = cea; + } +} +#else /* !X86_64 */ DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return cpu; +} +static inline void init_cea_offsets(void) { } #endif
/* Is called from entry code, so must be noinstr */ noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) { - unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
return (struct cpu_entry_area *) va; @@ -209,7 +247,6 @@ static __init void setup_cpu_entry_area_ptes(void)
/* The +1 is for the readonly IDT: */ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); - BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
start = CPU_ENTRY_AREA_BASE; @@ -225,6 +262,8 @@ void __init setup_cpu_entry_areas(void) { unsigned int cpu;
+ init_cea_offsets(); + setup_cpu_entry_area_ptes();
for_each_possible_cpu(cpu)
From: Michal Koutný mkoutny@suse.com
commit a3f547addcaa10df5a226526bc9e2d9a94542344 upstream.
The commit 97e3d26b5e5f ("x86/mm: Randomize per-cpu entry area") fixed an omission of KASLR on CPU entry areas. It doesn't take into account KASLR switches though, which may result in unintended non-determinism when a user wants to avoid it (e.g. debugging, benchmarking).
Generate only a single combination of CPU entry areas offsets -- the linear array that existed prior randomization when KASLR is turned off.
Since we have 3f148f331814 ("x86/kasan: Map shadow for percpu pages on demand") and followups, we can use the more relaxed guard kasrl_enabled() (in contrast to kaslr_memory_enabled()).
Fixes: 97e3d26b5e5f ("x86/mm: Randomize per-cpu entry area") Signed-off-by: Michal Koutný mkoutny@suse.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230306193144.24605-1-mkoutny%40suse.com Signed-off-by: Vasiliy Kovalev kovalev@altlinux.org --- arch/x86/mm/cpu_entry_area.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 350868f181116..4b7c9adc14ada 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -11,6 +11,7 @@ #include <asm/fixmap.h> #include <asm/desc.h> #include <asm/kasan.h> +#include <asm/setup.h>
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
@@ -30,6 +31,12 @@ static __init void init_cea_offsets(void) unsigned int max_cea; unsigned int i, j;
+ if (!kaslr_enabled()) { + for_each_possible_cpu(i) + per_cpu(_cea_offset, i) = i; + return; + } + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
/* O(sodding terrible) */
linux-stable-mirror@lists.linaro.org