4.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: Andy Lutomirski luto@kernel.org
commit 40e7f949e0d9a33968ebde5d67f7e3a47c97742a upstream.
The IST stacks are needed when an IST exception occurs and are accessed before any kernel code at all runs. Move them into struct cpu_entry_area.
The IST stacks are unlike the rest of cpu_entry_area: they're used even for entries from kernel mode. This means that they should be set up before we load the final IDT. Move cpu_entry_area setup to trap_init() for the boot CPU and set it up for all possible CPUs at once in native_smp_prepare_cpus().
Signed-off-by: Andy Lutomirski luto@kernel.org Signed-off-by: Thomas Gleixner tglx@linutronix.de Reviewed-by: Thomas Gleixner tglx@linutronix.de Reviewed-by: Borislav Petkov bp@suse.de Cc: Boris Ostrovsky boris.ostrovsky@oracle.com Cc: Borislav Petkov bp@alien8.de Cc: Borislav Petkov bpetkov@suse.de Cc: Brian Gerst brgerst@gmail.com Cc: Dave Hansen dave.hansen@intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: David Laight David.Laight@aculab.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: Eduardo Valentin eduval@amazon.com Cc: Greg KH gregkh@linuxfoundation.org Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Juergen Gross jgross@suse.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rik van Riel riel@redhat.com Cc: Will Deacon will.deacon@arm.com Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.480598743@linutronix.de Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
--- arch/x86/include/asm/fixmap.h | 12 ++++++ arch/x86/kernel/cpu/common.c | 74 +++++++++++++++++++++++------------------- arch/x86/kernel/traps.c | 3 + 3 files changed, 57 insertions(+), 32 deletions(-)
--- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -63,10 +63,22 @@ struct cpu_entry_area { struct tss_struct tss;
char entry_trampoline[PAGE_SIZE]; + +#ifdef CONFIG_X86_64 + /* + * Exception stacks used for IST entries. + * + * In the future, this should have a separate slot for each stack + * with guard pages between them. + */ + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; +#endif };
#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+extern void setup_cpu_entry_areas(void); + /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -466,24 +466,36 @@ void load_percpu_segment(int cpu) load_stack_canary_segment(); }
-static void set_percpu_fixmap_pages(int fixmap_index, void *ptr, - int pages, pgprot_t prot) -{ - int i; - - for (i = 0; i < pages; i++) { - __set_fixmap(fixmap_index - i, - per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot); - } -} - #ifdef CONFIG_X86_32 /* The 32-bit entry code needs to find cpu_entry_area. */ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); #endif
+#ifdef CONFIG_X86_64 +/* + * Special IST stacks which the CPU switches to when it calls + * an IST-marked descriptor entry. Up to 7 stacks (hardware + * limit), all of them are 4K, except the debug stack which + * is 8K. + */ +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ +}; + +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +#endif + +static void __init +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) +{ + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); +} + /* Setup the fixmap mappings only once per-processor */ -static inline void setup_cpu_entry_area(int cpu) +static void __init setup_cpu_entry_area(int cpu) { #ifdef CONFIG_X86_64 extern char _entry_trampoline[]; @@ -532,15 +544,31 @@ static inline void setup_cpu_entry_area( PAGE_KERNEL);
#ifdef CONFIG_X86_32 - this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); #endif
#ifdef CONFIG_X86_64 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + BUILD_BUG_ON(sizeof(exception_stacks) != + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, + PAGE_KERNEL); + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); #endif }
+void __init setup_cpu_entry_areas(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + setup_cpu_entry_area(cpu); +} + /* Load the original GDT from the per-cpu structure */ void load_direct_gdt(int cpu) { @@ -1385,20 +1413,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count);
-/* - * Special IST stacks which the CPU switches to when it calls - * an IST-marked descriptor entry. Up to 7 stacks (hardware - * limit), all of them are 4K, except the debug stack which - * is 8K. - */ -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ -}; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); - /* May not be marked __init: used by software suspend */ void syscall_init(void) { @@ -1607,7 +1621,7 @@ void cpu_init(void) * set up and load the per-CPU TSS */ if (!oist->ist[0]) { - char *estacks = per_cpu(exception_stacks, cpu); + char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
for (v = 0; v < N_EXCEPTION_STACKS; v++) { estacks += exception_stack_sizes[v]; @@ -1633,8 +1647,6 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, me);
- setup_cpu_entry_area(cpu); - /* * Initialize the TSS. sp0 points to the entry trampoline stack * regardless of what task is running. @@ -1694,8 +1706,6 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, curr);
- setup_cpu_entry_area(cpu); - /* * Initialize the TSS. Don't bother initializing sp0, as the initial * task never enters user mode. --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -947,6 +947,9 @@ dotraplinkage void do_iret_error(struct
void __init trap_init(void) { + /* Init cpu_entry_area before IST entries are set up */ + setup_cpu_entry_areas(); + idt_setup_traps();
/*