February 2018 - Linux-stable-mirror

Patch "powerpc/mm/radix: Split linear mapping on hot-unplug" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled powerpc/mm/radix: Split linear mapping on hot-unplug to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: powerpc-mm-radix-split-linear-mapping-on-hot-unplug.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From 4dd5f8a99e791a8c6500e3592f3ce81ae7edcde1 Mon Sep 17 00:00:00 2001 From: Balbir Singh <bsingharora(a)gmail.com> Date: Wed, 7 Feb 2018 17:35:51 +1100 Subject: powerpc/mm/radix: Split linear mapping on hot-unplug From: Balbir Singh <bsingharora(a)gmail.com> commit 4dd5f8a99e791a8c6500e3592f3ce81ae7edcde1 upstream. This patch splits the linear mapping if the hot-unplug range is smaller than the mapping size. The code detects if the mapping needs to be split into a smaller size and if so, uses the stop machine infrastructure to clear the existing mapping and then remap the remaining range using a smaller page size. The code will skip any region of the mapping that overlaps with kernel text and warn about it once. We don't want to remove a mapping where the kernel text and the LMB we intend to remove overlap in the same TLB mapping as it may affect the currently executing code. I've tested these changes under a kvm guest with 2 vcpus, from a split mapping point of view, some of the caveats mentioned above applied to the testing I did. Fixes: 4b5d62ca17a1 ("powerpc/mm: add radix__remove_section_mapping()") Signed-off-by: Balbir Singh <bsingharora(a)gmail.com> [mpe: Tweak change log to match updated behaviour] Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/powerpc/mm/pgtable-radix.c | 95 +++++++++++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 21 deletions(-) --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -17,6 +17,7 @@ #include <linux/of_fdt.h> #include <linux/mm.h> #include <linux/string_helpers.h> +#include <linux/stop_machine.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -671,6 +672,30 @@ static void free_pmd_table(pmd_t *pmd_st pud_clear(pud); } +struct change_mapping_params { + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long aligned_start; + unsigned long aligned_end; +}; + +static int stop_machine_change_mapping(void *data) +{ + struct change_mapping_params *params = + (struct change_mapping_params *)data; + + if (!data) + return -1; + + spin_unlock(&init_mm.page_table_lock); + pte_clear(&init_mm, params->aligned_start, params->pte); + create_physical_mapping(params->aligned_start, params->start); + create_physical_mapping(params->end, params->aligned_end); + spin_lock(&init_mm.page_table_lock); + return 0; +} + static void remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end) { @@ -699,6 +724,52 @@ static void remove_pte_table(pte_t *pte_ } } +/* + * clear the pte and potentially split the mapping helper + */ +static void split_kernel_mapping(unsigned long addr, unsigned long end, + unsigned long size, pte_t *pte) +{ + unsigned long mask = ~(size - 1); + unsigned long aligned_start = addr & mask; + unsigned long aligned_end = addr + size; + struct change_mapping_params params; + bool split_region = false; + + if ((end - addr) < size) { + /* + * We're going to clear the PTE, but not flushed + * the mapping, time to remap and flush. The + * effects if visible outside the processor or + * if we are running in code close to the + * mapping we cleared, we are in trouble. + */ + if (overlaps_kernel_text(aligned_start, addr) || + overlaps_kernel_text(end, aligned_end)) { + /* + * Hack, just return, don't pte_clear + */ + WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " + "text, not splitting\n", addr, end); + return; + } + split_region = true; + } + + if (split_region) { + params.pte = pte; + params.start = addr; + params.end = end; + params.aligned_start = addr & ~(size - 1); + params.aligned_end = min_t(unsigned long, aligned_end, + (unsigned long)__va(memblock_end_of_DRAM())); + stop_machine(stop_machine_change_mapping, &params, NULL); + return; + } + + pte_clear(&init_mm, addr, pte); +} + static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { @@ -714,13 +785,7 @@ static void remove_pmd_table(pmd_t *pmd_ continue; if (pmd_huge(*pmd)) { - if (!IS_ALIGNED(addr, PMD_SIZE) || - !IS_ALIGNED(next, PMD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pmd); + split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); continue; } @@ -745,13 +810,7 @@ static void remove_pud_table(pud_t *pud_ continue; if (pud_huge(*pud)) { - if (!IS_ALIGNED(addr, PUD_SIZE) || - !IS_ALIGNED(next, PUD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pud); + split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); continue; } @@ -777,13 +836,7 @@ static void remove_pagetable(unsigned lo continue; if (pgd_huge(*pgd)) { - if (!IS_ALIGNED(addr, PGDIR_SIZE) || - !IS_ALIGNED(next, PGDIR_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pgd); + split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); continue; } Patches currently in stable-queue which might be from bsingharora(a)gmail.com are queue-4.14/powerpc-radix-remove-trace_tlbie-call-from-radix__flush_tlb_all.patch queue-4.14/powerpc-mm-radix-split-linear-mapping-on-hot-unplug.patch

6 years, 10 months

1
0
0 0

Patch "KVM/x86: Reduce retpoline performance impact in slot_handle_level_range(), by always inlining iterator helper methods" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled KVM/x86: Reduce retpoline performance impact in slot_handle_level_range(), by always inlining iterator helper methods to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From 928a4c39484281f8ca366f53a1db79330d058401 Mon Sep 17 00:00:00 2001 From: David Woodhouse <dwmw(a)amazon.co.uk> Date: Sat, 10 Feb 2018 23:39:24 +0000 Subject: KVM/x86: Reduce retpoline performance impact in slot_handle_level_range(), by always inlining iterator helper methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: David Woodhouse <dwmw(a)amazon.co.uk> commit 928a4c39484281f8ca366f53a1db79330d058401 upstream. With retpoline, tight loops of "call this function for every XXX" are very much pessimised by taking a prediction miss *every* time. This one is by far the biggest contributor to the guest launch time with retpoline. By marking the iterator slot_handle_…() functions always_inline, we can ensure that the indirect function call can be optimised away into a direct call and it actually generates slightly smaller code because some of the other conditionals can get optimised away too. Performance is now pretty close to what we see with nospectre_v2 on the command line. Suggested-by: Linus Torvalds <torvalds(a)linux-foundation.org> Tested-by: Filippo Sironi <sironi(a)amazon.de> Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk> Reviewed-by: Filippo Sironi <sironi(a)amazon.de> Acked-by: Paolo Bonzini <pbonzini(a)redhat.com> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: Arjan van de Ven <arjan(a)linux.intel.com> Cc: Borislav Petkov <bp(a)alien8.de> Cc: Dan Williams <dan.j.williams(a)intel.com> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: David Woodhouse <dwmw2(a)infradead.org> Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe(a)redhat.com> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: arjan.van.de.ven(a)intel.com Cc: dave.hansen(a)intel.com Cc: jmattson(a)google.com Cc: karahmed(a)amazon.de Cc: kvm(a)vger.kernel.org Cc: rkrcmar(a)redhat.com Link: http://lkml.kernel.org/r/1518305967-31356-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/x86/kvm/mmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5063,7 +5063,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); /* The caller should hold mmu-lock before calling this function. */ -static bool +static __always_inline bool slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) @@ -5093,7 +5093,7 @@ slot_handle_level_range(struct kvm *kvm, return flush; } -static bool +static __always_inline bool slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, bool lock_flush_tlb) @@ -5104,7 +5104,7 @@ slot_handle_level(struct kvm *kvm, struc lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { @@ -5112,7 +5112,7 @@ slot_handle_all_level(struct kvm *kvm, s PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { @@ -5120,7 +5120,7 @@ slot_handle_large_level(struct kvm *kvm, PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { Patches currently in stable-queue which might be from dwmw(a)amazon.co.uk are queue-4.14/kvm-nvmx-set-the-cpu_based_use_msr_bitmaps-if-we-have-a-valid-l02-msr-bitmap.patch queue-4.14/x86-nvmx-properly-set-spec_ctrl-and-pred_cmd-before-merging-msrs.patch queue-4.14/x86-speculation-update-speculation-control-microcode-blacklist.patch queue-4.14/x86-speculation-correct-speculation-control-microcode-blacklist-again.patch queue-4.14/kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch queue-4.14/x86-speculation-clean-up-various-spectre-related-details.patch queue-4.14/revert-x86-speculation-simplify-indirect_branch_prediction_barrier.patch

6 years, 10 months

1
0
0 0

Patch "KVM/nVMX: Set the CPU_BASED_USE_MSR_BITMAPS if we have a valid L02 MSR bitmap" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled KVM/nVMX: Set the CPU_BASED_USE_MSR_BITMAPS if we have a valid L02 MSR bitmap to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: kvm-nvmx-set-the-cpu_based_use_msr_bitmaps-if-we-have-a-valid-l02-msr-bitmap.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From 3712caeb14dcb33fb4d5114f14c0beef10aca101 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed <karahmed(a)amazon.de> Date: Sat, 10 Feb 2018 23:39:26 +0000 Subject: KVM/nVMX: Set the CPU_BASED_USE_MSR_BITMAPS if we have a valid L02 MSR bitmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: KarimAllah Ahmed <karahmed(a)amazon.de> commit 3712caeb14dcb33fb4d5114f14c0beef10aca101 upstream. We either clear the CPU_BASED_USE_MSR_BITMAPS and end up intercepting all MSR accesses or create a valid L02 MSR bitmap and use that. This decision has to be made every time we evaluate whether we are going to generate the L02 MSR bitmap. Before commit: d28b387fb74d ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL") ... this was probably OK since the decision was always identical. This is no longer the case now since the MSR bitmap might actually change once we decide to not intercept SPEC_CTRL and PRED_CMD. Signed-off-by: KarimAllah Ahmed <karahmed(a)amazon.de> Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk> Acked-by: Paolo Bonzini <pbonzini(a)redhat.com> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: Arjan van de Ven <arjan(a)linux.intel.com> Cc: Borislav Petkov <bp(a)alien8.de> Cc: Dan Williams <dan.j.williams(a)intel.com> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: David Woodhouse <dwmw2(a)infradead.org> Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe(a)redhat.com> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Radim Krčmář <rkrcmar(a)redhat.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: arjan.van.de.ven(a)intel.com Cc: dave.hansen(a)intel.com Cc: jmattson(a)google.com Cc: kvm(a)vger.kernel.org Cc: sironi(a)amazon.de Link: http://lkml.kernel.org/r/1518305967-31356-6-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10127,7 +10127,8 @@ static void nested_get_vmcs12_pages(stru if (cpu_has_vmx_msr_bitmap() && nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) && nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) - ; + vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_USE_MSR_BITMAPS); else vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_USE_MSR_BITMAPS); Patches currently in stable-queue which might be from karahmed(a)amazon.de are queue-4.14/kvm-nvmx-set-the-cpu_based_use_msr_bitmaps-if-we-have-a-valid-l02-msr-bitmap.patch queue-4.14/x86-nvmx-properly-set-spec_ctrl-and-pred_cmd-before-merging-msrs.patch queue-4.14/x86-speculation-update-speculation-control-microcode-blacklist.patch queue-4.14/kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch queue-4.14/revert-x86-speculation-simplify-indirect_branch_prediction_barrier.patch

6 years, 10 months

1
0
0 0

Patch "crypto: sun4i_ss_prng - convert lock to _bh in sun4i_ss_prng_generate" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled crypto: sun4i_ss_prng - convert lock to _bh in sun4i_ss_prng_generate to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: crypto-sun4i_ss_prng-convert-lock-to-_bh-in-sun4i_ss_prng_generate.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From 2e7d1d61ea6c0f1c4da5eb82cafac750d55637a7 Mon Sep 17 00:00:00 2001 From: Artem Savkov <artem.savkov(a)gmail.com> Date: Tue, 6 Feb 2018 22:20:22 +0100 Subject: crypto: sun4i_ss_prng - convert lock to _bh in sun4i_ss_prng_generate From: Artem Savkov <artem.savkov(a)gmail.com> commit 2e7d1d61ea6c0f1c4da5eb82cafac750d55637a7 upstream. Lockdep detects a possible deadlock in sun4i_ss_prng_generate() and throws an "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage" warning. Disabling softirqs to fix this. Fixes: b8ae5c7387ad ("crypto: sun4i-ss - support the Security System PRNG") Signed-off-by: Artem Savkov <artem.savkov(a)gmail.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/crypto/sunxi-ss/sun4i-ss-prng.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c @@ -28,7 +28,7 @@ int sun4i_ss_prng_generate(struct crypto algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng); ss = algt->ss; - spin_lock(&ss->slock); + spin_lock_bh(&ss->slock); writel(mode, ss->base + SS_CTL); @@ -51,6 +51,6 @@ int sun4i_ss_prng_generate(struct crypto } writel(0, ss->base + SS_CTL); - spin_unlock(&ss->slock); + spin_unlock_bh(&ss->slock); return 0; } Patches currently in stable-queue which might be from artem.savkov(a)gmail.com are queue-4.14/crypto-sun4i_ss_prng-convert-lock-to-_bh-in-sun4i_ss_prng_generate.patch queue-4.14/crypto-sun4i_ss_prng-fix-return-value-of-sun4i_ss_prng_generate.patch

6 years, 10 months

1
0
0 0

Patch "crypto: sun4i_ss_prng - fix return value of sun4i_ss_prng_generate" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled crypto: sun4i_ss_prng - fix return value of sun4i_ss_prng_generate to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: crypto-sun4i_ss_prng-fix-return-value-of-sun4i_ss_prng_generate.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From dd78c832ffaf86eb6434e56de4bc3bc31f03f771 Mon Sep 17 00:00:00 2001 From: Artem Savkov <artem.savkov(a)gmail.com> Date: Tue, 6 Feb 2018 22:20:21 +0100 Subject: crypto: sun4i_ss_prng - fix return value of sun4i_ss_prng_generate From: Artem Savkov <artem.savkov(a)gmail.com> commit dd78c832ffaf86eb6434e56de4bc3bc31f03f771 upstream. According to crypto/rng.h generate function should return 0 on success and < 0 on error. Fixes: b8ae5c7387ad ("crypto: sun4i-ss - support the Security System PRNG") Signed-off-by: Artem Savkov <artem.savkov(a)gmail.com> Acked-by: Corentin Labbe <clabbe.montjoie(a)gmail.com> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/crypto/sunxi-ss/sun4i-ss-prng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c @@ -52,5 +52,5 @@ int sun4i_ss_prng_generate(struct crypto writel(0, ss->base + SS_CTL); spin_unlock(&ss->slock); - return dlen; + return 0; } Patches currently in stable-queue which might be from artem.savkov(a)gmail.com are queue-4.14/crypto-sun4i_ss_prng-convert-lock-to-_bh-in-sun4i_ss_prng_generate.patch queue-4.14/crypto-sun4i_ss_prng-fix-return-value-of-sun4i_ss_prng_generate.patch

6 years, 10 months

1
0
0 0

Patch "compiler-gcc.h: Introduce __optimize function attribute" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled compiler-gcc.h: Introduce __optimize function attribute to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: compiler-gcc.h-introduce-__optimize-function-attribute.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From df5d45aa08f848b79caf395211b222790534ccc7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven <geert(a)linux-m68k.org> Date: Thu, 1 Feb 2018 11:21:58 +0100 Subject: compiler-gcc.h: Introduce __optimize function attribute From: Geert Uytterhoeven <geert(a)linux-m68k.org> commit df5d45aa08f848b79caf395211b222790534ccc7 upstream. Create a new function attribute __optimize, which allows to specify an optimization level on a per-function basis. Signed-off-by: Geert Uytterhoeven <geert(a)linux-m68k.org> Acked-by: Ard Biesheuvel <ard.biesheuvel(a)linaro.org> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- include/linux/compiler-gcc.h | 4 ++++ include/linux/compiler.h | 4 ++++ 2 files changed, 8 insertions(+) --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -196,6 +196,10 @@ #endif /* __CHECKER__ */ #endif /* GCC_VERSION >= 40300 */ +#if GCC_VERSION >= 40400 +#define __optimize(level) __attribute__((__optimize__(level))) +#endif /* GCC_VERSION >= 40400 */ + #if GCC_VERSION >= 40500 #ifndef __CHECKER__ --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -266,6 +266,10 @@ static __always_inline void __write_once #endif /* __ASSEMBLY__ */ +#ifndef __optimize +# define __optimize(level) +#endif + /* Compile time object size, -1 for unknown */ #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 Patches currently in stable-queue which might be from geert(a)linux-m68k.org are queue-4.14/compiler-gcc.h-__nostackprotector-needs-gcc-4.4-and-up.patch queue-4.14/compiler-gcc.h-introduce-__optimize-function-attribute.patch

6 years, 10 months

1
0
0 0

Patch "compiler-gcc.h: __nostackprotector needs gcc-4.4 and up" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled compiler-gcc.h: __nostackprotector needs gcc-4.4 and up to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: compiler-gcc.h-__nostackprotector-needs-gcc-4.4-and-up.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From d9afaaa4ff7af8b87d4a205e48cb8a6f666d7f01 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven <geert(a)linux-m68k.org> Date: Thu, 1 Feb 2018 11:21:59 +0100 Subject: compiler-gcc.h: __nostackprotector needs gcc-4.4 and up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: Geert Uytterhoeven <geert(a)linux-m68k.org> commit d9afaaa4ff7af8b87d4a205e48cb8a6f666d7f01 upstream. Gcc versions before 4.4 do not recognize the __optimize__ compiler attribute: warning: ‘__optimize__’ attribute directive ignored Fixes: 7375ae3a0b79ea07 ("compiler-gcc.h: Introduce __nostackprotector function attribute") Signed-off-by: Geert Uytterhoeven <geert(a)linux-m68k.org> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- include/linux/compiler-gcc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -167,8 +167,6 @@ #if GCC_VERSION >= 40100 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) - -#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) #endif #if GCC_VERSION >= 40300 @@ -198,6 +196,7 @@ #if GCC_VERSION >= 40400 #define __optimize(level) __attribute__((__optimize__(level))) +#define __nostackprotector __optimize("no-stack-protector") #endif /* GCC_VERSION >= 40400 */ #if GCC_VERSION >= 40500 Patches currently in stable-queue which might be from geert(a)linux-m68k.org are queue-4.14/compiler-gcc.h-__nostackprotector-needs-gcc-4.4-and-up.patch queue-4.14/compiler-gcc.h-introduce-__optimize-function-attribute.patch

6 years, 10 months

1
0
0 0

FAILED: patch "[PATCH] crypto: sha3-generic - deal with oversize stack frames" failed to apply to 4.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 4767b9ad7d762876a5865a06465e13e139a01b6b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel(a)linaro.org> Date: Sat, 27 Jan 2018 09:18:32 +0000 Subject: [PATCH] crypto: sha3-generic - deal with oversize stack frames As reported by kbuild test robot, the optimized SHA3 C implementation compiles to mn10300 code that uses a disproportionate amount of stack space, i.e., crypto/sha3_generic.c: In function 'keccakf': crypto/sha3_generic.c:147:1: warning: the frame size of 1232 bytes is larger than 1024 bytes [-Wframe-larger-than=] As kindly diagnosed by Arnd, this does not only occur when building for the mn10300 architecture (which is what the report was about) but also for h8300, and builds for other 32-bit architectures show an increase in stack space utilization as well. Given that SHA3 operates on 64-bit quantities, and keeps a state matrix of 25 64-bit words, it is not surprising that 32-bit architectures with few general purpose registers are impacted the most by this, and it is therefore reasonable to implement a workaround that distinguishes between 32-bit and 64-bit architectures. Arnd figured out that taking the round calculation out of the loop, and inlining it explicitly but only on 64-bit architectures preserves most of the performance gain achieved by the rewrite, and also gets rid of the excessive use of stack space. Reported-by: kbuild test robot <fengguang.wu(a)intel.com> Suggested-by: Arnd Bergmann <arnd(a)arndb.de> Signed-off-by: Ard Biesheuvel <ard.biesheuvel(a)linaro.org> Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au> diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index a965b9d80559..951c4eb70262 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -20,6 +20,20 @@ #include <crypto/sha3.h> #include <asm/unaligned.h> +/* + * On some 32-bit architectures (mn10300 and h8300), GCC ends up using + * over 1 KB of stack if we inline the round calculation into the loop + * in keccakf(). On the other hand, on 64-bit architectures with plenty + * of [64-bit wide] general purpose registers, not inlining it severely + * hurts performance. So let's use 64-bitness as a heuristic to decide + * whether to inline or not. + */ +#ifdef CONFIG_64BIT +#define SHA3_INLINE inline +#else +#define SHA3_INLINE noinline +#endif + #define KECCAK_ROUNDS 24 static const u64 keccakf_rndc[24] = { @@ -35,111 +49,115 @@ static const u64 keccakf_rndc[24] = { /* update the state with given number of rounds */ -static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25]) +static SHA3_INLINE void keccakf_round(u64 st[25]) { u64 t[5], tt, bc[5]; - int round; - for (round = 0; round < KECCAK_ROUNDS; round++) { + /* Theta */ + bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; + bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; + bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; + bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; + bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; + + t[0] = bc[4] ^ rol64(bc[1], 1); + t[1] = bc[0] ^ rol64(bc[2], 1); + t[2] = bc[1] ^ rol64(bc[3], 1); + t[3] = bc[2] ^ rol64(bc[4], 1); + t[4] = bc[3] ^ rol64(bc[0], 1); + + st[0] ^= t[0]; + + /* Rho Pi */ + tt = st[1]; + st[ 1] = rol64(st[ 6] ^ t[1], 44); + st[ 6] = rol64(st[ 9] ^ t[4], 20); + st[ 9] = rol64(st[22] ^ t[2], 61); + st[22] = rol64(st[14] ^ t[4], 39); + st[14] = rol64(st[20] ^ t[0], 18); + st[20] = rol64(st[ 2] ^ t[2], 62); + st[ 2] = rol64(st[12] ^ t[2], 43); + st[12] = rol64(st[13] ^ t[3], 25); + st[13] = rol64(st[19] ^ t[4], 8); + st[19] = rol64(st[23] ^ t[3], 56); + st[23] = rol64(st[15] ^ t[0], 41); + st[15] = rol64(st[ 4] ^ t[4], 27); + st[ 4] = rol64(st[24] ^ t[4], 14); + st[24] = rol64(st[21] ^ t[1], 2); + st[21] = rol64(st[ 8] ^ t[3], 55); + st[ 8] = rol64(st[16] ^ t[1], 45); + st[16] = rol64(st[ 5] ^ t[0], 36); + st[ 5] = rol64(st[ 3] ^ t[3], 28); + st[ 3] = rol64(st[18] ^ t[3], 21); + st[18] = rol64(st[17] ^ t[2], 15); + st[17] = rol64(st[11] ^ t[1], 10); + st[11] = rol64(st[ 7] ^ t[2], 6); + st[ 7] = rol64(st[10] ^ t[0], 3); + st[10] = rol64( tt ^ t[1], 1); + + /* Chi */ + bc[ 0] = ~st[ 1] & st[ 2]; + bc[ 1] = ~st[ 2] & st[ 3]; + bc[ 2] = ~st[ 3] & st[ 4]; + bc[ 3] = ~st[ 4] & st[ 0]; + bc[ 4] = ~st[ 0] & st[ 1]; + st[ 0] ^= bc[ 0]; + st[ 1] ^= bc[ 1]; + st[ 2] ^= bc[ 2]; + st[ 3] ^= bc[ 3]; + st[ 4] ^= bc[ 4]; + + bc[ 0] = ~st[ 6] & st[ 7]; + bc[ 1] = ~st[ 7] & st[ 8]; + bc[ 2] = ~st[ 8] & st[ 9]; + bc[ 3] = ~st[ 9] & st[ 5]; + bc[ 4] = ~st[ 5] & st[ 6]; + st[ 5] ^= bc[ 0]; + st[ 6] ^= bc[ 1]; + st[ 7] ^= bc[ 2]; + st[ 8] ^= bc[ 3]; + st[ 9] ^= bc[ 4]; + + bc[ 0] = ~st[11] & st[12]; + bc[ 1] = ~st[12] & st[13]; + bc[ 2] = ~st[13] & st[14]; + bc[ 3] = ~st[14] & st[10]; + bc[ 4] = ~st[10] & st[11]; + st[10] ^= bc[ 0]; + st[11] ^= bc[ 1]; + st[12] ^= bc[ 2]; + st[13] ^= bc[ 3]; + st[14] ^= bc[ 4]; + + bc[ 0] = ~st[16] & st[17]; + bc[ 1] = ~st[17] & st[18]; + bc[ 2] = ~st[18] & st[19]; + bc[ 3] = ~st[19] & st[15]; + bc[ 4] = ~st[15] & st[16]; + st[15] ^= bc[ 0]; + st[16] ^= bc[ 1]; + st[17] ^= bc[ 2]; + st[18] ^= bc[ 3]; + st[19] ^= bc[ 4]; + + bc[ 0] = ~st[21] & st[22]; + bc[ 1] = ~st[22] & st[23]; + bc[ 2] = ~st[23] & st[24]; + bc[ 3] = ~st[24] & st[20]; + bc[ 4] = ~st[20] & st[21]; + st[20] ^= bc[ 0]; + st[21] ^= bc[ 1]; + st[22] ^= bc[ 2]; + st[23] ^= bc[ 3]; + st[24] ^= bc[ 4]; +} - /* Theta */ - bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; - bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; - bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; - bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; - bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; - - t[0] = bc[4] ^ rol64(bc[1], 1); - t[1] = bc[0] ^ rol64(bc[2], 1); - t[2] = bc[1] ^ rol64(bc[3], 1); - t[3] = bc[2] ^ rol64(bc[4], 1); - t[4] = bc[3] ^ rol64(bc[0], 1); - - st[0] ^= t[0]; - - /* Rho Pi */ - tt = st[1]; - st[ 1] = rol64(st[ 6] ^ t[1], 44); - st[ 6] = rol64(st[ 9] ^ t[4], 20); - st[ 9] = rol64(st[22] ^ t[2], 61); - st[22] = rol64(st[14] ^ t[4], 39); - st[14] = rol64(st[20] ^ t[0], 18); - st[20] = rol64(st[ 2] ^ t[2], 62); - st[ 2] = rol64(st[12] ^ t[2], 43); - st[12] = rol64(st[13] ^ t[3], 25); - st[13] = rol64(st[19] ^ t[4], 8); - st[19] = rol64(st[23] ^ t[3], 56); - st[23] = rol64(st[15] ^ t[0], 41); - st[15] = rol64(st[ 4] ^ t[4], 27); - st[ 4] = rol64(st[24] ^ t[4], 14); - st[24] = rol64(st[21] ^ t[1], 2); - st[21] = rol64(st[ 8] ^ t[3], 55); - st[ 8] = rol64(st[16] ^ t[1], 45); - st[16] = rol64(st[ 5] ^ t[0], 36); - st[ 5] = rol64(st[ 3] ^ t[3], 28); - st[ 3] = rol64(st[18] ^ t[3], 21); - st[18] = rol64(st[17] ^ t[2], 15); - st[17] = rol64(st[11] ^ t[1], 10); - st[11] = rol64(st[ 7] ^ t[2], 6); - st[ 7] = rol64(st[10] ^ t[0], 3); - st[10] = rol64( tt ^ t[1], 1); - - /* Chi */ - bc[ 0] = ~st[ 1] & st[ 2]; - bc[ 1] = ~st[ 2] & st[ 3]; - bc[ 2] = ~st[ 3] & st[ 4]; - bc[ 3] = ~st[ 4] & st[ 0]; - bc[ 4] = ~st[ 0] & st[ 1]; - st[ 0] ^= bc[ 0]; - st[ 1] ^= bc[ 1]; - st[ 2] ^= bc[ 2]; - st[ 3] ^= bc[ 3]; - st[ 4] ^= bc[ 4]; - - bc[ 0] = ~st[ 6] & st[ 7]; - bc[ 1] = ~st[ 7] & st[ 8]; - bc[ 2] = ~st[ 8] & st[ 9]; - bc[ 3] = ~st[ 9] & st[ 5]; - bc[ 4] = ~st[ 5] & st[ 6]; - st[ 5] ^= bc[ 0]; - st[ 6] ^= bc[ 1]; - st[ 7] ^= bc[ 2]; - st[ 8] ^= bc[ 3]; - st[ 9] ^= bc[ 4]; - - bc[ 0] = ~st[11] & st[12]; - bc[ 1] = ~st[12] & st[13]; - bc[ 2] = ~st[13] & st[14]; - bc[ 3] = ~st[14] & st[10]; - bc[ 4] = ~st[10] & st[11]; - st[10] ^= bc[ 0]; - st[11] ^= bc[ 1]; - st[12] ^= bc[ 2]; - st[13] ^= bc[ 3]; - st[14] ^= bc[ 4]; - - bc[ 0] = ~st[16] & st[17]; - bc[ 1] = ~st[17] & st[18]; - bc[ 2] = ~st[18] & st[19]; - bc[ 3] = ~st[19] & st[15]; - bc[ 4] = ~st[15] & st[16]; - st[15] ^= bc[ 0]; - st[16] ^= bc[ 1]; - st[17] ^= bc[ 2]; - st[18] ^= bc[ 3]; - st[19] ^= bc[ 4]; - - bc[ 0] = ~st[21] & st[22]; - bc[ 1] = ~st[22] & st[23]; - bc[ 2] = ~st[23] & st[24]; - bc[ 3] = ~st[24] & st[20]; - bc[ 4] = ~st[20] & st[21]; - st[20] ^= bc[ 0]; - st[21] ^= bc[ 1]; - st[22] ^= bc[ 2]; - st[23] ^= bc[ 3]; - st[24] ^= bc[ 4]; +static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25]) +{ + int round; + for (round = 0; round < KECCAK_ROUNDS; round++) { + keccakf_round(st); /* Iota */ st[0] ^= keccakf_rndc[round]; }

6 years, 10 months

1
0
0 0

FAILED: patch "[PATCH] x86/entry/64: Clear extra registers beyond syscall arguments, " failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 8e1eb3fa009aa7c0b944b3c8b26b07de0efb3200 Mon Sep 17 00:00:00 2001 From: Dan Williams <dan.j.williams(a)intel.com> Date: Mon, 5 Feb 2018 17:18:05 -0800 Subject: [PATCH] x86/entry/64: Clear extra registers beyond syscall arguments, to reduce speculation attack surface At entry userspace may have (maliciously) populated the extra registers outside the syscall calling convention with arbitrary values that could be useful in a speculative execution (Spectre style) attack. Clear these registers to minimize the kernel's attack surface. Note, this only clears the extra registers and not the unused registers for syscalls less than 6 arguments, since those registers are likely to be clobbered well before their values could be put to use under speculation. Note, Linus found that the XOR instructions can be executed with minimized cost if interleaved with the PUSH instructions, and Ingo's analysis found that R10 and R11 should be included in the register clearing beyond the typical 'extra' syscall calling convention registers. Suggested-by: Linus Torvalds <torvalds(a)linux-foundation.org> Reported-by: Andi Kleen <ak(a)linux.intel.com> Signed-off-by: Dan Williams <dan.j.williams(a)intel.com> Cc: <stable(a)vger.kernel.org> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: Borislav Petkov <bp(a)alien8.de> Cc: Brian Gerst <brgerst(a)gmail.com> Cc: Denys Vlasenko <dvlasenk(a)redhat.com> Cc: H. Peter Anvin <hpa(a)zytor.com> Cc: Josh Poimboeuf <jpoimboe(a)redhat.com> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Thomas Gleixner <tglx(a)linutronix.de> Link: http://lkml.kernel.org/r/151787988577.7847.16733592218894189003.stgit@dwill… [ Made small improvements to the changelog and the code comments. ] Signed-off-by: Ingo Molnar <mingo(a)kernel.org> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c752abe89d80..065a71b90808 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -235,13 +235,26 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %r8 /* pt_regs->r8 */ pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ + /* + * Clear extra registers that a speculation attack might + * otherwise want to exploit. Interleave XOR with PUSH + * for better uop scheduling: + */ + xorq %r10, %r10 /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ + xorq %r12, %r12 /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ + xorq %r13, %r13 /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ + xorq %r14, %r14 /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ + xorq %r15, %r15 /* nospec r15 */ UNWIND_HINT_REGS TRACE_IRQS_OFF

6 years, 10 months

1
0
0 0

[PATCH 14/30] crypto: x86/cast5-avx - fix ECB encryption when long sg follows short one

by Eric Biggers

From: Eric Biggers <ebiggers(a)google.com> With ecb-cast5-avx, if a 128+ byte scatterlist element followed a shorter one, then the algorithm accidentally encrypted/decrypted only 8 bytes instead of the expected 128 bytes. Fix it by setting the encryption/decryption 'fn' correctly. Fixes: c12ab20b162c ("crypto: cast5/avx - avoid using temporary stack buffers") Cc: <stable(a)vger.kernel.org> # v3.8+ Signed-off-by: Eric Biggers <ebiggers(a)google.com> --- arch/x86/crypto/cast5_avx_glue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index dbea6020ffe7d..575292a33bdf2 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -66,8 +66,6 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); int err; - fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; - err = blkcipher_walk_virt(desc, walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; @@ -79,6 +77,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; do { fn(ctx, wdst, wsrc); -- 2.16.2

6 years, 10 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror February 2018