This is a note to let you know that I've just added the patch titled
x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code
to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git%3Ba=su...
The filename of the patch is: x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree, please let stable@vger.kernel.org know about it.
From ce4a4e565f5264909a18c733b864c3f74467f69e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski luto@kernel.org Date: Sun, 28 May 2017 10:00:14 -0700 Subject: x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code
From: Andy Lutomirski luto@kernel.org
commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream.
The UP asm/tlbflush.h generates somewhat nicer code than the SMP version. Aside from that, it's fallen quite a bit behind the SMP code:
- flush_tlb_mm_range() didn't flush individual pages if the range was small.
- The lazy TLB code was much weaker. This usually wouldn't matter, but, if a kernel thread flushed its lazy "active_mm" more than once (due to reclaim or similar), it wouldn't be unlazied and would instead pointlessly flush repeatedly.
- Tracepoints were missing.
Aside from that, simply having the UP code around was a maintanence burden, since it means that any change to the TLB flush code had to make sure not to break it.
Simplify everything by deleting the UP code.
Signed-off-by: Andy Lutomirski luto@kernel.org Cc: Andrew Morton akpm@linux-foundation.org Cc: Arjan van de Ven arjan@linux.intel.com Cc: Borislav Petkov bpetkov@suse.de Cc: Dave Hansen dave.hansen@intel.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Mel Gorman mgorman@suse.de Cc: Michal Hocko mhocko@suse.com Cc: Nadav Amit nadav.amit@gmail.com Cc: Nadav Amit namit@vmware.com Cc: Peter Zijlstra peterz@infradead.org Cc: Rik van Riel riel@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Eduardo Valentin eduval@amazon.com Signed-off-by: Eduardo Valentin edubezval@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- arch/x86/Kconfig | 2 arch/x86/include/asm/hardirq.h | 2 arch/x86/include/asm/mmu.h | 6 -- arch/x86/include/asm/mmu_context.h | 2 arch/x86/include/asm/tlbbatch.h | 2 arch/x86/include/asm/tlbflush.h | 81 ------------------------------------- arch/x86/mm/init.c | 2 arch/x86/mm/tlb.c | 17 ------- 8 files changed, 5 insertions(+), 109 deletions(-)
--- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -45,7 +45,7 @@ config X86 select ARCH_USE_CMPXCHG_LOCKREF if X86_64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION if X86_32 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -22,8 +22,8 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; - unsigned int irq_tlb_count; #endif + unsigned int irq_tlb_count; #ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; #endif --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -33,12 +33,6 @@ typedef struct { #endif } mm_context_t;
-#ifdef CONFIG_SMP void leave_mm(int cpu); -#else -static inline void leave_mm(int cpu) -{ -} -#endif
#endif /* _ASM_X86_MMU_H */ --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { -#ifdef CONFIG_SMP if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif }
static inline int init_new_context(struct task_struct *tsk, --- a/arch/x86/include/asm/tlbbatch.h +++ b/arch/x86/include/asm/tlbbatch.h @@ -3,7 +3,6 @@
#include <linux/cpumask.h>
-#ifdef CONFIG_SMP struct arch_tlbflush_unmap_batch { /* * Each bit set is a CPU that potentially has a TLB entry for one of @@ -11,6 +10,5 @@ struct arch_tlbflush_unmap_batch { */ struct cpumask cpumask; }; -#endif
#endif /* _ARCH_X86_TLBBATCH_H */ --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,7 @@ #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> +#include <asm/smp.h>
static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) @@ -65,10 +66,8 @@ static inline void invpcid_flush_all_non #endif
struct tlb_state { -#ifdef CONFIG_SMP struct mm_struct *active_mm; int state; -#endif
/* * Access to this CR4 shadow and to H/W CR4 is protected by @@ -222,82 +221,6 @@ struct flush_tlb_info { unsigned long end; };
-#ifndef CONFIG_SMP - -/* "_up" is for UniProcessor. - * - * This is a helper for other header functions. *Not* intended to be called - * directly. All global TLB flushes need to either call this, or to bump the - * vm statistics themselves. - */ -static inline void __flush_tlb_up(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb(); -} - -static inline void flush_tlb_all(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb_all(); -} - -static inline void flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void local_flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_mm_range(struct mm_struct *mm, - unsigned long start, unsigned long end, unsigned long vmflag) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void native_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ -} - -static inline void reset_lazy_tlbstate(void) -{ -} - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - -#else /* SMP */ - -#include <asm/smp.h> - #define local_flush_tlb() __flush_tlb()
#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) @@ -337,8 +260,6 @@ static inline void arch_tlbbatch_add_mm(
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
-#endif /* SMP */ - #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, info) \ native_flush_tlb_others(mask, info) --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -764,10 +764,8 @@ void __init zone_sizes_init(void) }
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { -#ifdef CONFIG_SMP .active_mm = &init_mm, .state = 0, -#endif .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; EXPORT_SYMBOL_GPL(cpu_tlbstate); --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -15,7 +15,7 @@ #include <linux/debugfs.h>
/* - * Smarter SMP flushing macros. + * TLB flushing, formerly SMP-only * c/o Linus Torvalds. * * These mean you can really definitely utterly forget about @@ -28,8 +28,6 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */
-#ifdef CONFIG_SMP - /* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. @@ -53,8 +51,6 @@ void leave_mm(int cpu) } EXPORT_SYMBOL_GPL(leave_mm);
-#endif /* CONFIG_SMP */ - void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -85,10 +81,8 @@ void switch_mm_irqs_off(struct mm_struct set_pgd(pgd, init_mm.pgd[stack_pgd_index]); }
-#ifdef CONFIG_SMP this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); -#endif
cpumask_set_cpu(cpu, mm_cpumask(next));
@@ -146,9 +140,7 @@ void switch_mm_irqs_off(struct mm_struct if (unlikely(prev->context.ldt != next->context.ldt)) load_mm_ldt(next); #endif - } -#ifdef CONFIG_SMP - else { + } else { this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
@@ -175,11 +167,8 @@ void switch_mm_irqs_off(struct mm_struct load_mm_ldt(next); } } -#endif }
-#ifdef CONFIG_SMP - /* * The flush IPI assumes that a thread switch happens in this order: * [cpu0: the cpu that switches] @@ -459,5 +448,3 @@ static int __init create_tlb_single_page return 0; } late_initcall(create_tlb_single_page_flush_ceiling); - -#endif /* CONFIG_SMP */
Patches currently in stable-queue which might be from luto@kernel.org are
queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch queue-4.9/x86-kvm-vmx-simplify-segment_base.patch queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch