February 2018 - Linux-stable-mirror

[Linux-stable-mirror] Patch "KVM: nVMX: Eliminate vmcs02 pool" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled KVM: nVMX: Eliminate vmcs02 pool to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: KVM_nVMX_Eliminate_vmcs02_pool.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. Subject: KVM: nVMX: Eliminate vmcs02 pool From: Jim Mattson jmattson(a)google.com Date: Mon Nov 27 17:22:25 2017 -0600 From: Jim Mattson jmattson(a)google.com commit de3a0021a60635de96aa92713c1a31a96747d72c The potential performance advantages of a vmcs02 pool have never been realized. To simplify the code, eliminate the pool. Instead, a single vmcs02 is allocated per VCPU when the VCPU enters VMX operation. Cc: stable(a)vger.kernel.org # prereq for Spectre mitigation Signed-off-by: Jim Mattson <jmattson(a)google.com> Signed-off-by: Mark Kanda <mark.kanda(a)oracle.com> Reviewed-by: Ameya More <ameya.more(a)oracle.com> Reviewed-by: David Hildenbrand <david(a)redhat.com> Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com> Signed-off-by: Radim Krčmář <rkrcmar(a)redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/x86/kvm/vmx.c | 146 ++++++++--------------------------------------------- 1 file changed, 23 insertions(+), 123 deletions(-) --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -182,7 +182,6 @@ module_param(ple_window_max, int, S_IRUG extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -223,7 +222,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). @@ -406,13 +405,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ -struct vmcs02_list { - struct list_head list; - gpa_t vmptr; - struct loaded_vmcs vmcs02; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -437,15 +429,15 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ - struct list_head vmcs02_pool; - int vmcs02_num; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + + struct loaded_vmcs vmcs02; + /* - * Guest pages referred to in vmcs02 with host-physical pointers, so - * we must keep them pinned while L2 runs. + * Guest pages referred to in the vmcs02 with host-physical + * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; @@ -6964,94 +6956,6 @@ static int handle_monitor(struct kvm_vcp } /* - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. - * We could reuse a single VMCS for all the L2 guests, but we also want the - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this - * allows keeping them loaded on the processor, and in the future will allow - * optimizations where prepare_vmcs02 doesn't need to set all the fields on - * every entry if they never change. - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. - * - * The following functions allocate and free a vmcs02 in this pool. - */ - -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmx->nested.current_vmptr) { - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { - /* Recycle the least recently used VMCS. */ - item = list_last_entry(&vmx->nested.vmcs02_pool, - struct vmcs02_list, list); - item->vmptr = vmx->nested.current_vmptr; - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - /* Create a new VMCS */ - item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); - item->vmcs02.shadow_vmcs = NULL; - if (!item->vmcs02.vmcs) { - kfree(item); - return NULL; - } - loaded_vmcs_init(&item->vmcs02); - item->vmptr = vmx->nested.current_vmptr; - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num++; - return &item->vmcs02; -} - -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmptr) { - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - return; - } -} - -/* - * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs - * must be &vmx->vmcs01. - */ -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item, *n; - - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - /* - * Something will leak if the above WARN triggers. Better than - * a use-after-free. - */ - if (vmx->loaded_vmcs == &item->vmcs02) - continue; - - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - } -} - -/* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified * by Vol 2B, VMX Instruction Reference, "Conventions". @@ -7232,6 +7136,12 @@ static int enter_vmx_operation(struct kv struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs *shadow_vmcs; + vmx->nested.vmcs02.vmcs = alloc_vmcs(); + vmx->nested.vmcs02.shadow_vmcs = NULL; + if (!vmx->nested.vmcs02.vmcs) + goto out_vmcs02; + loaded_vmcs_init(&vmx->nested.vmcs02); + if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); @@ -7254,9 +7164,6 @@ static int enter_vmx_operation(struct kv vmx->vmcs01.shadow_vmcs = shadow_vmcs; } - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; @@ -7271,6 +7178,9 @@ out_cached_vmcs12: free_page((unsigned long)vmx->nested.msr_bitmap); out_msr_bitmap: + free_loaded_vmcs(&vmx->nested.vmcs02); + +out_vmcs02: return -ENOMEM; } @@ -7423,7 +7333,7 @@ static void free_nested(struct vcpu_vmx vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); - /* Unpin physical memory we referred to in current vmcs02 */ + /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; @@ -7439,7 +7349,7 @@ static void free_nested(struct vcpu_vmx vmx->nested.pi_desc = NULL; } - nested_free_all_saved_vmcss(vmx); + free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ @@ -7482,8 +7392,6 @@ static int handle_vmclear(struct kvm_vcp vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); - nested_free_vmcs02(vmx, vmptr); - nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); } @@ -8395,10 +8303,11 @@ static bool nested_vmx_exit_reflected(st /* * The host physical addresses of some pages of guest memory - * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU - * may write to these pages via their host physical address while - * L2 is running, bypassing any address-translation-based dirty - * tracking (e.g. EPT write protection). + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC + * Page). The CPU may write to these pages via their host + * physical address while L2 is running, bypassing any + * address-translation-based dirty tracking (e.g. EPT write + * protection). * * Mark them dirty on every exit from L2 to prevent them from * getting out of sync with dirty tracking. @@ -10894,20 +10803,15 @@ static int enter_vmx_non_root_mode(struc { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - struct loaded_vmcs *vmcs02; u32 msr_entry_idx; u32 exit_qual; - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - vmx_switch_vmcs(vcpu, vmcs02); + vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { @@ -11522,10 +11426,6 @@ static void nested_vmx_vmexit(struct kvm vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); - /* if no vmcs02 cache requested, remove the one we used */ - if (VMCS02_POOL_SIZE == 0) - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); - /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); Patches currently in stable-queue which might be from jmattson(a)google.com are queue-4.14/x86kvm_Update_spectre-v1_mitigation.patch queue-4.14/KVMVMX_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/KVM_nVMX_Eliminate_vmcs02_pool.patch queue-4.14/KVMVMX_Emulate_MSR_IA32_ARCH_CAPABILITIES.patch queue-4.14/KVM_VMX_make_MSR_bitmaps_per-VCPU.patch queue-4.14/KVMx86_Update_the_reverse_cpuid_list_to_include_CPUID_7_EDX.patch

7 years, 8 months

1
0
0 0

[Linux-stable-mirror] Patch "KVM: VMX: introduce alloc_loaded_vmcs" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled KVM: VMX: introduce alloc_loaded_vmcs to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: KVM_VMX_introduce_alloc_loaded_vmcs.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. Subject: KVM: VMX: introduce alloc_loaded_vmcs From: Paolo Bonzini pbonzini(a)redhat.com Date: Thu Jan 11 12:16:15 2018 +0100 From: Paolo Bonzini pbonzini(a)redhat.com commit f21f165ef922c2146cc5bdc620f542953c41714b Group together the calls to alloc_vmcs and loaded_vmcs_init. Soon we'll also allocate an MSR bitmap there. Cc: stable(a)vger.kernel.org # prereq for Spectre mitigation Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/x86/kvm/vmx.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3814,11 +3814,6 @@ static struct vmcs *alloc_vmcs_cpu(int c return vmcs; } -static struct vmcs *alloc_vmcs(void) -{ - return alloc_vmcs_cpu(raw_smp_processor_id()); -} - static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); @@ -3837,6 +3832,22 @@ static void free_loaded_vmcs(struct load WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } +static struct vmcs *alloc_vmcs(void) +{ + return alloc_vmcs_cpu(raw_smp_processor_id()); +} + +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs_init(loaded_vmcs); + return 0; +} + static void free_kvm_area(void) { int cpu; @@ -7135,12 +7146,11 @@ static int enter_vmx_operation(struct kv { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs *shadow_vmcs; + int r; - vmx->nested.vmcs02.vmcs = alloc_vmcs(); - vmx->nested.vmcs02.shadow_vmcs = NULL; - if (!vmx->nested.vmcs02.vmcs) + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); + if (r < 0) goto out_vmcs02; - loaded_vmcs_init(&vmx->nested.vmcs02); if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = @@ -9535,13 +9545,11 @@ static struct kvm_vcpu *vmx_create_vcpu( if (!vmx->guest_msrs) goto free_pml; - vmx->loaded_vmcs = &vmx->vmcs01; - vmx->loaded_vmcs->vmcs = alloc_vmcs(); - vmx->loaded_vmcs->shadow_vmcs = NULL; - if (!vmx->loaded_vmcs->vmcs) + err = alloc_loaded_vmcs(&vmx->vmcs01); + if (err < 0) goto free_msrs; - loaded_vmcs_init(vmx->loaded_vmcs); + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; Patches currently in stable-queue which might be from pbonzini(a)redhat.com are queue-4.14/x86pti_Do_not_enable_PTI_on_CPUs_which_are_not_vulnerable_to_Meltdown.patch queue-4.14/x86kvm_Update_spectre-v1_mitigation.patch queue-4.14/x86speculation_Use_Indirect_Branch_Prediction_Barrier_in_context_switch.patch queue-4.14/KVM_VMX_introduce_alloc_loaded_vmcs.patch queue-4.14/x86cpufeature_Blacklist_SPEC_CTRLPRED_CMD_on_early_Spectre_v2_microcodes.patch queue-4.14/x86cpufeatures_Add_Intel_feature_bits_for_Speculation_Control.patch queue-4.14/x86paravirt_Remove_noreplace-paravirt_cmdline_option.patch queue-4.14/KVM_VMX_Make_indirect_call_speculation_safe.patch queue-4.14/x86msr_Add_definitions_for_new_speculation_control_MSRs.patch queue-4.14/KVMVMX_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/x86cpufeatures_Add_CPUID_7_EDX_CPUID_leaf.patch queue-4.14/KVM_nVMX_Eliminate_vmcs02_pool.patch queue-4.14/x86cpufeatures_Add_AMD_feature_bits_for_Speculation_Control.patch queue-4.14/KVMSVM_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/KVMx86_Add_IBPB_support.patch queue-4.14/KVMVMX_Emulate_MSR_IA32_ARCH_CAPABILITIES.patch queue-4.14/x86speculation_Add_basic_IBPB_(Indirect_Branch_Prediction_Barrier)_support.patch queue-4.14/x86speculation_Simplify_indirect_branch_prediction_barrier().patch queue-4.14/KVM_VMX_make_MSR_bitmaps_per-VCPU.patch queue-4.14/KVM_x86_Make_indirect_calls_in_emulator_speculation_safe.patch queue-4.14/x86retpoline_Simplify_vmexit_fill_RSB().patch queue-4.14/x86cpufeatures_Clean_up_Spectre_v2_related_CPUID_flags.patch queue-4.14/KVMx86_Update_the_reverse_cpuid_list_to_include_CPUID_7_EDX.patch

7 years, 8 months

1
0
0 0

[Linux-stable-mirror] Patch "KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: KVMVMX_Emulate_MSR_IA32_ARCH_CAPABILITIES.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. Subject: KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES From: KarimAllah Ahmed karahmed(a)amazon.de Date: Thu Feb 1 22:59:44 2018 +0100 From: KarimAllah Ahmed karahmed(a)amazon.de commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO (bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the contents will come directly from the hardware, but user-space can still override it. [dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional] Signed-off-by: KarimAllah Ahmed <karahmed(a)amazon.de> Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk> Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com> Reviewed-by: Darren Kenny <darren.kenny(a)oracle.com> Reviewed-by: Jim Mattson <jmattson(a)google.com> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: Andi Kleen <ak(a)linux.intel.com> Cc: Jun Nakajima <jun.nakajima(a)intel.com> Cc: kvm(a)vger.kernel.org Cc: Dave Hansen <dave.hansen(a)intel.com> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: Asit Mallick <asit.k.mallick(a)intel.com> Cc: Arjan Van De Ven <arjan.van.de.ven(a)intel.com> Cc: Greg KH <gregkh(a)linuxfoundation.org> Cc: Dan Williams <dan.j.williams(a)intel.com> Cc: Tim Chen <tim.c.chen(a)linux.intel.com> Cc: Ashok Raj <ashok.raj(a)intel.com> Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon… Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx.c | 15 +++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -394,7 +394,7 @@ static inline int __do_cpuid_ent(struct /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(AVX512_4VNNIW) | F(AVX512_4FMAPS); + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -583,6 +583,8 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif + u64 arch_capabilities; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; u32 secondary_exec_control; @@ -3257,6 +3259,12 @@ static int vmx_get_msr(struct kvm_vcpu * case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -3392,6 +3400,11 @@ static int vmx_set_msr(struct kvm_vcpu * vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -5652,6 +5665,8 @@ static int vmx_vcpu_setup(struct vcpu_vm ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1006,6 +1006,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; Patches currently in stable-queue which might be from karahmed(a)amazon.de are queue-4.14/x86spectre_Simplify_spectre_v2_command_line_parsing.patch queue-4.14/x86pti_Do_not_enable_PTI_on_CPUs_which_are_not_vulnerable_to_Meltdown.patch queue-4.14/x86speculation_Use_Indirect_Branch_Prediction_Barrier_in_context_switch.patch queue-4.14/x86cpuid_Fix_up_virtual_IBRSIBPBSTIBP_feature_bits_on_Intel.patch queue-4.14/x86cpufeature_Blacklist_SPEC_CTRLPRED_CMD_on_early_Spectre_v2_microcodes.patch queue-4.14/x86cpufeatures_Add_Intel_feature_bits_for_Speculation_Control.patch queue-4.14/x86msr_Add_definitions_for_new_speculation_control_MSRs.patch queue-4.14/KVMVMX_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/x86cpufeatures_Add_CPUID_7_EDX_CPUID_leaf.patch queue-4.14/x86cpufeatures_Add_AMD_feature_bits_for_Speculation_Control.patch queue-4.14/KVMSVM_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/KVMx86_Add_IBPB_support.patch queue-4.14/KVMVMX_Emulate_MSR_IA32_ARCH_CAPABILITIES.patch queue-4.14/x86speculation_Add_basic_IBPB_(Indirect_Branch_Prediction_Barrier)_support.patch queue-4.14/x86speculation_Simplify_indirect_branch_prediction_barrier().patch queue-4.14/x86retpoline_Simplify_vmexit_fill_RSB().patch queue-4.14/x86cpufeatures_Clean_up_Spectre_v2_related_CPUID_flags.patch queue-4.14/KVMx86_Update_the_reverse_cpuid_list_to_include_CPUID_7_EDX.patch queue-4.14/x86retpoline_Avoid_retpolines_for_built-in___init_functions.patch

7 years, 8 months

1
0
0 0

[Linux-stable-mirror] Patch "KVM: VMX: make MSR bitmaps per-VCPU" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled KVM: VMX: make MSR bitmaps per-VCPU to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: KVM_VMX_make_MSR_bitmaps_per-VCPU.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. Subject: KVM: VMX: make MSR bitmaps per-VCPU From: Paolo Bonzini pbonzini(a)redhat.com Date: Tue Jan 16 16:51:18 2018 +0100 From: Paolo Bonzini pbonzini(a)redhat.com commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6 Place the MSR bitmap in struct loaded_vmcs, and update it in place every time the x2apic or APICv state can change. This is rare and the loop can handle 64 MSRs per iteration, in a similar fashion as nested_vmx_prepare_msr_bitmap. This prepares for choosing, on a per-VM basis, whether to intercept the SPEC_CTRL and PRED_CMD MSRs. Cc: stable(a)vger.kernel.org # prereq for Spectre mitigation Suggested-by: Jim Mattson <jmattson(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/x86/kvm/vmx.c | 276 ++++++++++++++++++++++++++++------------------------- 1 file changed, 150 insertions(+), 126 deletions(-) --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -108,6 +108,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +#define MSR_TYPE_RW 3 + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 +#define MSR_BITMAP_MODE_LM 4 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ @@ -206,6 +214,7 @@ struct loaded_vmcs { int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; + unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -446,8 +455,6 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; - unsigned long *msr_bitmap; - struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -562,6 +569,7 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; u8 fail; + u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -919,6 +927,7 @@ static bool vmx_get_nmi_mask(struct kvm_ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -938,12 +947,6 @@ static DEFINE_PER_CPU(spinlock_t, blocke enum { VMX_IO_BITMAP_A, VMX_IO_BITMAP_B, - VMX_MSR_BITMAP_LEGACY, - VMX_MSR_BITMAP_LONGMODE, - VMX_MSR_BITMAP_LEGACY_X2APIC_APICV, - VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV, - VMX_MSR_BITMAP_LEGACY_X2APIC, - VMX_MSR_BITMAP_LONGMODE_X2APIC, VMX_VMREAD_BITMAP, VMX_VMWRITE_BITMAP, VMX_BITMAP_NR @@ -953,12 +956,6 @@ static unsigned long *vmx_bitmap[VMX_BIT #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) -#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY]) -#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE]) -#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV]) -#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV]) -#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC]) -#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC]) #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) @@ -2559,36 +2556,6 @@ static void move_msr_up(struct vcpu_vmx vmx->guest_msrs[from] = tmp; } -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) -{ - unsigned long *msr_bitmap; - - if (is_guest_mode(vcpu)) - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; - else if (cpu_has_secondary_exec_ctrls() && - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv; - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; - } - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - } - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); -} - /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2629,7 +2596,7 @@ static void setup_msrs(struct vcpu_vmx * vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(&vmx->vcpu); + vmx_update_msr_bitmap(&vmx->vcpu); } /* @@ -3829,6 +3796,8 @@ static void free_loaded_vmcs(struct load loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } @@ -3845,7 +3814,18 @@ static int alloc_loaded_vmcs(struct load loaded_vmcs->shadow_vmcs = NULL; loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + } return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; } static void free_kvm_area(void) @@ -4920,10 +4900,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4957,6 +4935,50 @@ static void __vmx_disable_intercept_for_ } } +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) +{ + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if (msr <= 0x1fff) { + if (type & MSR_TYPE_R) + /* read-low */ + __set_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __set_bit(msr, msr_bitmap + 0x800 / f); + + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + if (type & MSR_TYPE_R) + /* read-high */ + __set_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __set_bit(msr, msr_bitmap + 0xc00 / f); + + } +} + +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) +{ + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); +} + /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. @@ -5003,28 +5025,68 @@ static void nested_vmx_disable_intercept } } -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { - if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, - msr, MSR_TYPE_R | MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, - msr, MSR_TYPE_R | MSR_TYPE_W); -} - -static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active) -{ - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv, - msr, type); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv, - msr, type); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, type); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, type); + u8 mode = 0; + + if (cpu_has_secondary_exec_ctrls() && + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } + + if (is_long_mode(vcpu)) + mode |= MSR_BITMAP_MODE_LM; + + return mode; +} + +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) +{ + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } + } +} + +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; + + if (!changed) + return; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, + !(mode & MSR_BITMAP_MODE_LM)); + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; } static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) @@ -5272,7 +5334,7 @@ static void vmx_refresh_apicv_exec_ctrl( } if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) @@ -5459,7 +5521,7 @@ static int vmx_vcpu_setup(struct vcpu_vm vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -6742,7 +6804,7 @@ void vmx_enable_tdp(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i, msr; + int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6763,9 +6825,6 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; goto out; @@ -6828,42 +6887,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv, - vmx_msr_bitmap_longmode, PAGE_SIZE); - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - for (msr = 0x800; msr <= 0x8ff; msr++) { - if (msr == 0x839 /* TMCCT */) - continue; - vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true); - } - - /* - * TPR reads and writes can be virtualized even if virtual interrupt - * delivery is not in use. - */ - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true); - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false); - - /* EOI */ - vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true); - /* SELF-IPI */ - vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); - if (enable_ept) vmx_enable_tdp(); else @@ -7152,13 +7177,6 @@ static int enter_vmx_operation(struct kv if (r < 0) goto out_vmcs02; - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx->nested.msr_bitmap) - goto out_msr_bitmap; - } - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; @@ -7185,9 +7203,6 @@ out_shadow_vmcs: kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: - free_page((unsigned long)vmx->nested.msr_bitmap); - -out_msr_bitmap: free_loaded_vmcs(&vmx->nested.vmcs02); out_vmcs02: @@ -7332,10 +7347,6 @@ static void free_nested(struct vcpu_vmx free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - if (vmx->nested.msr_bitmap) { - free_page((unsigned long)vmx->nested.msr_bitmap); - vmx->nested.msr_bitmap = NULL; - } if (enable_shadow_vmcs) { vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); @@ -8851,7 +8862,7 @@ static void vmx_set_virtual_x2apic_mode( } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -9513,6 +9524,7 @@ static struct kvm_vcpu *vmx_create_vcpu( { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -9549,6 +9561,15 @@ static struct kvm_vcpu *vmx_create_vcpu( if (err < 0) goto free_msrs; + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); @@ -10018,7 +10039,7 @@ static inline bool nested_vmx_merge_msr_ int msr; struct page *page; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) @@ -10595,6 +10616,9 @@ static int prepare_vmcs02(struct kvm_vcp if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -11388,7 +11412,7 @@ static void load_vmcs12_host_state(struc vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) Patches currently in stable-queue which might be from jmattson(a)google.com are queue-4.14/x86kvm_Update_spectre-v1_mitigation.patch queue-4.14/KVMVMX_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/KVM_nVMX_Eliminate_vmcs02_pool.patch queue-4.14/KVMVMX_Emulate_MSR_IA32_ARCH_CAPABILITIES.patch queue-4.14/KVM_VMX_make_MSR_bitmaps_per-VCPU.patch queue-4.14/KVMx86_Update_the_reverse_cpuid_list_to_include_CPUID_7_EDX.patch

7 years, 8 months

1
0
0 0

[Linux-stable-mirror] Patch "KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: KVMVMX_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. Subject: KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL From: KarimAllah Ahmed karahmed(a)amazon.de Date: Thu Feb 1 22:59:45 2018 +0100 From: KarimAllah Ahmed karahmed(a)amazon.de commit d28b387fb74da95d69d2615732f50cceb38e9a4d [ Based on a patch from Ashok Raj <ashok.raj(a)intel.com> ] Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only start saving and restoring when a non-zero is written to it. No attempt is made to handle STIBP here, intentionally. Filtering STIBP may be added in a future patch, which may require trapping all writes if we don't want to pass it through directly to the guest. [dwmw2: Clean up CPUID bits, save/restore manually, handle reset] Signed-off-by: KarimAllah Ahmed <karahmed(a)amazon.de> Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk> Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Reviewed-by: Darren Kenny <darren.kenny(a)oracle.com> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com> Reviewed-by: Jim Mattson <jmattson(a)google.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: Andi Kleen <ak(a)linux.intel.com> Cc: Jun Nakajima <jun.nakajima(a)intel.com> Cc: kvm(a)vger.kernel.org Cc: Dave Hansen <dave.hansen(a)intel.com> Cc: Tim Chen <tim.c.chen(a)linux.intel.com> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: Asit Mallick <asit.k.mallick(a)intel.com> Cc: Arjan Van De Ven <arjan.van.de.ven(a)intel.com> Cc: Greg KH <gregkh(a)linuxfoundation.org> Cc: Paolo Bonzini <pbonzini(a)redhat.com> Cc: Dan Williams <dan.j.williams(a)intel.com> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Ashok Raj <ashok.raj(a)intel.com> Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon… Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/x86/kvm/cpuid.c | 9 ++-- arch/x86/kvm/vmx.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 2 3 files changed, 110 insertions(+), 6 deletions(-) --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -367,7 +367,7 @@ static inline int __do_cpuid_ent(struct /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB); + F(IBPB) | F(IBRS); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -394,7 +394,8 @@ static inline int __do_cpuid_ent(struct /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(ARCH_CAPABILITIES); + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -630,9 +631,11 @@ static inline int __do_cpuid_ent(struct g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBPB isn't necessarily present in hardware cpuid */ + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ if (boot_cpu_has(X86_FEATURE_IBPB)) entry->ebx |= F(IBPB); + if (boot_cpu_has(X86_FEATURE_IBRS)) + entry->ebx |= F(IBRS); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -584,6 +584,7 @@ struct vcpu_vmx { #endif u64 arch_capabilities; + u64 spec_ctrl; u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; @@ -1906,6 +1907,29 @@ static void update_exception_bitmap(stru } /* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + +/* * Check if MSR is intercepted for L01 MSR bitmap. */ static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) @@ -3259,6 +3283,14 @@ static int vmx_get_msr(struct kvm_vcpu * case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; case MSR_IA32_ARCH_CAPABILITIES: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) @@ -3372,6 +3404,37 @@ static int vmx_set_msr(struct kvm_vcpu * case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && @@ -5697,6 +5760,7 @@ static void vmx_vcpu_reset(struct kvm_vc u64 cr0; vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); @@ -9360,6 +9424,15 @@ static void __noclone vmx_vcpu_run(struc vmx_arm_hv_timer(vcpu); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -9478,6 +9551,27 @@ static void __noclone vmx_vcpu_run(struc #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -10109,7 +10203,7 @@ static inline bool nested_vmx_merge_msr_ unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* - * pred_cmd is trying to verify two things: + * pred_cmd & spec_ctrl are trying to verify two things: * * 1. L0 gave a permission to L1 to actually passthrough the MSR. This * ensures that we do not accidentally generate an L02 MSR bitmap @@ -10122,9 +10216,10 @@ static inline bool nested_vmx_merge_msr_ * the MSR. */ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && - !pred_cmd) + !pred_cmd && !spec_ctrl) return false; page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); @@ -10158,6 +10253,12 @@ static inline bool nested_vmx_merge_msr_ } } + if (spec_ctrl) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_R | MSR_TYPE_W); + if (pred_cmd) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1006,7 +1006,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_ARCH_CAPABILITIES + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; Patches currently in stable-queue which might be from ashok.raj(a)intel.com are queue-4.14/x86pti_Do_not_enable_PTI_on_CPUs_which_are_not_vulnerable_to_Meltdown.patch queue-4.14/x86cpufeature_Blacklist_SPEC_CTRLPRED_CMD_on_early_Spectre_v2_microcodes.patch queue-4.14/x86cpufeatures_Add_Intel_feature_bits_for_Speculation_Control.patch queue-4.14/x86paravirt_Remove_noreplace-paravirt_cmdline_option.patch queue-4.14/KVM_VMX_Make_indirect_call_speculation_safe.patch queue-4.14/x86msr_Add_definitions_for_new_speculation_control_MSRs.patch queue-4.14/KVMVMX_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/x86cpufeatures_Add_CPUID_7_EDX_CPUID_leaf.patch queue-4.14/x86cpufeatures_Add_AMD_feature_bits_for_Speculation_Control.patch queue-4.14/KVMSVM_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/KVMx86_Add_IBPB_support.patch queue-4.14/KVMVMX_Emulate_MSR_IA32_ARCH_CAPABILITIES.patch queue-4.14/x86speculation_Add_basic_IBPB_(Indirect_Branch_Prediction_Barrier)_support.patch queue-4.14/KVM_x86_Make_indirect_calls_in_emulator_speculation_safe.patch

7 years, 8 months

1
0
0 0

[Linux-stable-mirror] Patch "KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: KVMSVM_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. Subject: KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL From: KarimAllah Ahmed karahmed(a)amazon.de Date: Sat Feb 3 15:56:23 2018 +0100 From: KarimAllah Ahmed karahmed(a)amazon.de commit b2ac58f90540e39324e7a29a7ad471407ae0bf48 [ Based on a patch from Paolo Bonzini <pbonzini(a)redhat.com> ] ... basically doing exactly what we do for VMX: - Passthrough SPEC_CTRL to guests (if enabled in guest CPUID) - Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest actually used it. Signed-off-by: KarimAllah Ahmed <karahmed(a)amazon.de> Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk> Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Reviewed-by: Darren Kenny <darren.kenny(a)oracle.com> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: Andi Kleen <ak(a)linux.intel.com> Cc: Jun Nakajima <jun.nakajima(a)intel.com> Cc: kvm(a)vger.kernel.org Cc: Dave Hansen <dave.hansen(a)intel.com> Cc: Tim Chen <tim.c.chen(a)linux.intel.com> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: Asit Mallick <asit.k.mallick(a)intel.com> Cc: Arjan Van De Ven <arjan.van.de.ven(a)intel.com> Cc: Greg KH <gregkh(a)linuxfoundation.org> Cc: Paolo Bonzini <pbonzini(a)redhat.com> Cc: Dan Williams <dan.j.williams(a)intel.com> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Ashok Raj <ashok.raj(a)intel.com> Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon… Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- arch/x86/kvm/svm.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -184,6 +184,8 @@ struct vcpu_svm { u64 gs_base; } host; + u64 spec_ctrl; + u32 *msrpm; ulong nmi_iret_rip; @@ -249,6 +251,7 @@ static const struct svm_direct_access_ms { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_SPEC_CTRL, .always = false }, { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, @@ -882,6 +885,25 @@ static bool valid_msr_intercept(u32 inde return false; } +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +{ + u8 bit_write; + unsigned long tmp; + u32 offset; + u32 *msrpm; + + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + + offset = svm_msrpm_offset(msr); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + return !!test_bit(bit_write, &tmp); +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { @@ -1587,6 +1609,8 @@ static void svm_vcpu_reset(struct kvm_vc u32 dummy; u32 eax = 1; + svm->spec_ctrl = 0; + if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; @@ -3591,6 +3615,13 @@ static int svm_get_msr(struct kvm_vcpu * case MSR_VM_CR: msr_info->data = svm->nested.vm_cr_msr; break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + return 1; + + msr_info->data = svm->spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3682,6 +3713,33 @@ static int svm_set_msr(struct kvm_vcpu * case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + svm->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_svm_vmrun_msrpm. + * We update the L1 MSR bit as well since it will end up + * touching the MSR anyway now. + */ + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) @@ -4950,6 +5008,15 @@ static void svm_vcpu_run(struct kvm_vcpu local_irq_enable(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -5042,6 +5109,27 @@ static void svm_vcpu_run(struct kvm_vcpu #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); Patches currently in stable-queue which might be from pbonzini(a)redhat.com are queue-4.14/x86pti_Do_not_enable_PTI_on_CPUs_which_are_not_vulnerable_to_Meltdown.patch queue-4.14/x86kvm_Update_spectre-v1_mitigation.patch queue-4.14/x86speculation_Use_Indirect_Branch_Prediction_Barrier_in_context_switch.patch queue-4.14/KVM_VMX_introduce_alloc_loaded_vmcs.patch queue-4.14/x86cpufeature_Blacklist_SPEC_CTRLPRED_CMD_on_early_Spectre_v2_microcodes.patch queue-4.14/x86cpufeatures_Add_Intel_feature_bits_for_Speculation_Control.patch queue-4.14/x86paravirt_Remove_noreplace-paravirt_cmdline_option.patch queue-4.14/KVM_VMX_Make_indirect_call_speculation_safe.patch queue-4.14/x86msr_Add_definitions_for_new_speculation_control_MSRs.patch queue-4.14/KVMVMX_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/x86cpufeatures_Add_CPUID_7_EDX_CPUID_leaf.patch queue-4.14/KVM_nVMX_Eliminate_vmcs02_pool.patch queue-4.14/x86cpufeatures_Add_AMD_feature_bits_for_Speculation_Control.patch queue-4.14/KVMSVM_Allow_direct_access_to_MSR_IA32_SPEC_CTRL.patch queue-4.14/KVMx86_Add_IBPB_support.patch queue-4.14/KVMVMX_Emulate_MSR_IA32_ARCH_CAPABILITIES.patch queue-4.14/x86speculation_Add_basic_IBPB_(Indirect_Branch_Prediction_Barrier)_support.patch queue-4.14/x86speculation_Simplify_indirect_branch_prediction_barrier().patch queue-4.14/KVM_VMX_make_MSR_bitmaps_per-VCPU.patch queue-4.14/KVM_x86_Make_indirect_calls_in_emulator_speculation_safe.patch queue-4.14/x86retpoline_Simplify_vmexit_fill_RSB().patch queue-4.14/x86cpufeatures_Clean_up_Spectre_v2_related_CPUID_flags.patch queue-4.14/KVMx86_Update_the_reverse_cpuid_list_to_include_CPUID_7_EDX.patch

7 years, 8 months

1
0
0 0

[Linux-stable-mirror] Patch "Documentation: Document array_index_nospec" has been added to the 4.14-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled Documentation: Document array_index_nospec to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: Documentation_Document_array_index_nospec.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. Subject: Documentation: Document array_index_nospec From: Mark Rutland mark.rutland(a)arm.com Date: Mon Jan 29 17:02:16 2018 -0800 From: Mark Rutland mark.rutland(a)arm.com commit f84a56f73dddaeac1dba8045b007f742f61cd2da Document the rationale and usage of the new array_index_nospec() helper. Signed-off-by: Mark Rutland <mark.rutland(a)arm.com> Signed-off-by: Will Deacon <will.deacon(a)arm.com> Signed-off-by: Dan Williams <dan.j.williams(a)intel.com> Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Reviewed-by: Kees Cook <keescook(a)chromium.org> Cc: linux-arch(a)vger.kernel.org Cc: Jonathan Corbet <corbet(a)lwn.net> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: gregkh(a)linuxfoundation.org Cc: kernel-hardening(a)lists.openwall.com Cc: torvalds(a)linux-foundation.org Cc: alan(a)linux.intel.com Link: https://lkml.kernel.org/r/151727413645.33451.15878817161436755393.stgit@dwi… Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- Documentation/speculation.txt | 90 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) --- /dev/null +++ b/Documentation/speculation.txt @@ -0,0 +1,90 @@ +This document explains potential effects of speculation, and how undesirable +effects can be mitigated portably using common APIs. + +=========== +Speculation +=========== + +To improve performance and minimize average latencies, many contemporary CPUs +employ speculative execution techniques such as branch prediction, performing +work which may be discarded at a later stage. + +Typically speculative execution cannot be observed from architectural state, +such as the contents of registers. However, in some cases it is possible to +observe its impact on microarchitectural state, such as the presence or +absence of data in caches. Such state may form side-channels which can be +observed to extract secret information. + +For example, in the presence of branch prediction, it is possible for bounds +checks to be ignored by code which is speculatively executed. Consider the +following code: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else + return array[index]; + } + +Which, on arm64, may be compiled to an assembly sequence such as: + + CMP <index>, #MAX_ARRAY_ELEMS + B.LT less + MOV <returnval>, #0 + RET + less: + LDR <returnval>, [<array>, <index>] + RET + +It is possible that a CPU mis-predicts the conditional branch, and +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This +value will subsequently be discarded, but the speculated load may affect +microarchitectural state which can be subsequently measured. + +More complex sequences involving multiple dependent memory accesses may +result in sensitive information being leaked. Consider the following +code, building on the prior example: + + int load_dependent_arrays(int *arr1, int *arr2, int index) + { + int val1, val2, + + val1 = load_array(arr1, index); + val2 = load_array(arr2, val1); + + return val2; + } + +Under speculation, the first call to load_array() may return the value +of an out-of-bounds address, while the second call will influence +microarchitectural state dependent on this value. This may provide an +arbitrary read primitive. + +==================================== +Mitigating speculation side-channels +==================================== + +The kernel provides a generic API to ensure that bounds checks are +respected even under speculation. Architectures which are affected by +speculation-based side-channels are expected to implement these +primitives. + +The array_index_nospec() helper in <linux/nospec.h> can be used to +prevent information from being leaked via side-channels. + +A call to array_index_nospec(index, size) returns a sanitized index +value that is bounded to [0, size) even under cpu speculation +conditions. + +This can be used to protect the earlier load_array() example: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else { + index = array_index_nospec(index, MAX_ARRAY_ELEMS); + return array[index]; + } + } Patches currently in stable-queue which might be from mark.rutland(a)arm.com are queue-4.14/Documentation_Document_array_index_nospec.patch

7 years, 8 months

1
0
0 0

[Linux-stable-mirror] [PATCH 4.9] kaiser: allocate pgd with order 0 when pti=off

by Hugh Dickins

The 4.9.77 version of "x86/pti/efi: broken conversion from efi to kernel page table" looked nicer than the 4.4.112 version, but was suboptimal on machines booted with "pti=off" (or on AMD machines): it allocated pgd with an order 1 page whatever the setting of kaiser_enabled. Fix that by moving the definition of PGD_ALLOCATION_ORDER from asm/pgalloc.h to asm/pgtable.h, which already defines kaiser_enabled. Fixes: 1b92c48a2eeb ("x86/pti/efi: broken conversion from efi to kernel page table") Cc: Pavel Tatashin <pasha.tatashin(a)oracle.com> Cc: Steven Sistare <steven.sistare(a)oracle.com> Cc: Jiri Kosina <jkosina(a)suse.cz> Cc: stable(a)vger.kernel.org Signed-off-by: Hugh Dickins <hughd(a)google.com> --- arch/x86/include/asm/pgalloc.h | 11 ----------- arch/x86/include/asm/pgtable.h | 6 ++++++ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 1178a51b77f3..b6d425999f99 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -27,17 +27,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; -#ifdef CONFIG_PAGE_TABLE_ISOLATION -/* - * Instead of one PGD, we acquire two PGDs. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. - */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif - /* * Allocate and free page tables. */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2536f90cd30c..5af0401ccff2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -20,9 +20,15 @@ #ifdef CONFIG_PAGE_TABLE_ISOLATION extern int kaiser_enabled; +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ #else #define kaiser_enabled 0 #endif +#define PGD_ALLOCATION_ORDER kaiser_enabled void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); void ptdump_walk_pgd_level_checkwx(void); -- 2.16.0.rc1.238.g530d649a79-goog

7 years, 8 months

4
3
0 0

[Linux-stable-mirror] regression in igb driver from commit 182785335447

by Michal Kubecek

one of my colleagues observed a regression in recent 4.4.x kernels on one of test machines with 82575EB NIC (rev 02, 8086:10a7, firmware version 1.6.5). On boot, first port fails to initialize and only the net device for second is created. Kernel log looks like [ 13.710535] igb: Intel(R) Gigabit Ethernet Network Driver - version 5.4.0-k [ 13.710538] igb: Copyright (c) 2007-2014 Intel Corporation. [ 13.710584] igb 0000:08:00.0: PCI->APIC IRQ transform: INT A -> IRQ 56 [ 13.712126] igb: probe of 0000:08:00.0 failed with error -2 [ 13.712152] igb 0000:08:00.1: PCI->APIC IRQ transform: INT B -> IRQ 70 [ 13.904537] igb 0000:08:00.1: Intel(R) Gigabit Ethernet Network Connection [ 13.904545] igb 0000:08:00.1: eth0: (PCIe:2.5Gb/s:Width x4) 00:30:48:7b:5d:37 [ 13.904547] igb 0000:08:00.1: eth0: PBA No: Unknown [ 13.904556] igb 0000:08:00.1: Using MSI-X interrupts. 4 rx queue(s), 4 tx queue(s) [ 13.927029] igb 0000:08:00.1 eth1: renamed from eth0 Checking the changelog led us to a stable-4.4.y backport of mainline commit 182785335447 ("igb: reset the PHY before reading the PHY ID") as the most promising suspect and reverting it fixed the issue. I also reproduced the issue with 4.15 kernel, except this time both ports of the card failed to probe: [ 16.826649] igb: Intel(R) Gigabit Ethernet Network Driver - version 5.4.0-k [ 16.840784] igb: Copyright (c) 2007-2014 Intel Corporation. [ 16.852176] igb 0000:08:00.0: PCI->APIC IRQ transform: INT A -> IRQ 56 [ 16.867919] igb: probe of 0000:08:00.0 failed with error -2 [ 16.879254] igb 0000:08:00.1: PCI->APIC IRQ transform: INT B -> IRQ 70 [ 16.898178] igb: probe of 0000:08:00.1 failed with error -2 Reverting commit 182785335447 fixed the issue here as well. Michal Kubecek

7 years, 8 months

2
2
0 0

[Linux-stable-mirror] [PATCH 4.15 00/55] 4.15.1-stable review

by Greg Kroah-Hartman

This is the start of the stable review cycle for the 4.15.1 release. There are 55 patches in this series, all will be posted as a response to this one. If anyone has any issues with these being applied, please let me know. Responses should be made by Sun Feb 4 14:07:50 UTC 2018. Anything received after that time might be too late. The whole patch series can be found in one patch at: kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.15.1-rc1.gz or in the git tree and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.15.y and the diffstat can be found below. thanks, greg k-h ------------- Pseudo-Shortlog of commits: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Linux 4.15.1-rc1 Matthew Garrett <mjg59(a)google.com> x86/efi: Clarify that reset attack mitigation needs appropriate userspace Dmitry Torokhov <dmitry.torokhov(a)gmail.com> Input: synaptics-rmi4 - do not delete interrupt memory too early Dmitry Torokhov <dmitry.torokhov(a)gmail.com> Input: synaptics-rmi4 - unmask F03 interrupts when port is opened Wei Yongjun <weiyongjun1(a)huawei.com> test_firmware: fix missing unlock on error in config_num_requests_store() Narcisa Ana Maria Vasile <narcisaanamaria12(a)gmail.com> iio: chemical: ccs811: Fix output of IIO_CONCENTRATION channels Fabrice Gasnier <fabrice.gasnier(a)st.com> iio: adc: stm32: fix scan of multiple channels with DMA Stefan Agner <stefan(a)agner.ch> spi: imx: do not access registers while clocks disabled Fabio Estevam <fabio.estevam(a)nxp.com> serial: imx: Only wakeup via RTSDEN bit if the system has RTS/CTS Andy Shevchenko <andriy.shevchenko(a)linux.intel.com> serial: 8250_dw: Revert "Improve clock rate setting" Wei Yongjun <weiyongjun1(a)huawei.com> serial: 8250_uniphier: fix error return code in uniphier_uart_probe() Masahiro Yamada <yamada.masahiro(a)socionext.com> serial: 8250_of: fix return code when probe function fails to get reset Tomas Winkler <tomas.winkler(a)intel.com> mei: me: allow runtime pm for platform with D0i3 Ganesh Mahendran <opensource.ganesh(a)gmail.com> android: binder: use VM_ALLOC to get vm area Martijn Coenen <maco(a)android.com> ANDROID: binder: remove waitqueue when thread exits. Benjamin Herrenschmidt <benh(a)kernel.crashing.org> usb/gadget: Fix "high bandwidth" check in usb_gadget_ep_match_desc() Oliver Neukum <oneukum(a)suse.com> usb: uas: unconditionally bring back host after reset Hemant Kumar <hemantk(a)codeaurora.org> usb: f_fs: Prevent gadget unbind if it is already unbound Johan Hovold <johan(a)kernel.org> USB: serial: simple: add Motorola Tetra driver Shuah Khan <shuah(a)kernel.org> usbip: list: don't list devices attached to vhci_hcd Shuah Khan <shuah(a)kernel.org> usbip: prevent bind loops on devices attached to vhci_hcd Jia-Ju Bai <baijiaju1990(a)gmail.com> USB: serial: io_edgeport: fix possible sleep-in-atomic Oliver Neukum <oneukum(a)suse.com> CDC-ACM: apply quirk for card reader Hans de Goede <hdegoede(a)redhat.com> USB: cdc-acm: Do not log urb submission errors on disconnect Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> USB: serial: pl2303: new device id for Chilitag OKAMOTO Yoshiaki <yokamoto(a)allied-telesis.co.jp> usb: option: Add support for FS040U modem Gaurav Kohli <gkohli(a)codeaurora.org> tty: fix data race between tty_init_dev and flush of buf Gilad Ben-Yossef <gilad(a)benyossef.com> staging: ccree: fix fips event irq handling build Gilad Ben-Yossef <gilad(a)benyossef.com> staging: ccree: NULLify backup_info when unused Dmitry Eremin <dmitry.eremin(a)intel.com> staging: lustre: separate a connection destroy from free struct kib_conn Dan Carpenter <dan.carpenter(a)oracle.com> scsi: storvsc: missing error code in storvsc_probe() Raghava Aditya Renukunta <RaghavaAditya.Renukunta(a)microsemi.com> scsi: aacraid: Fix hang in kdump Raghava Aditya Renukunta <RaghavaAditya.Renukunta(a)microsemi.com> scsi: aacraid: Fix udev inquiry race condition Mike Rapoport <rppt(a)linux.vnet.ibm.com> ima/policy: fix parsing of fsuuid Lyude Paul <lyude(a)redhat.com> igb: Free IRQs when device is hotplugged Jesse Chan <jc(a)linux.com> mtd: nand: denali_pci: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE Jesse Chan <jc(a)linux.com> gpio: ath79: add missing MODULE_DESCRIPTION/LICENSE Jesse Chan <jc(a)linux.com> gpio: iop: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE Jesse Chan <jc(a)linux.com> power: reset: zx-reboot: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE Jason Gerecke <killertofu(a)gmail.com> HID: wacom: Fix reporting of touch toggle (WACOM_HID_WD_MUTE_DEVICE) events Aaron Armstrong Skomra <skomra(a)gmail.com> HID: wacom: EKR: ensure devres groups at higher indexes are released Stephan Mueller <smueller(a)chronox.de> crypto: af_alg - whitelist mask and type Ard Biesheuvel <ard.biesheuvel(a)linaro.org> crypto: sha3-generic - fixes for alignment and big endian operation Antoine Tenart <antoine.tenart(a)free-electrons.com> crypto: inside-secure - avoid unmapping DMA memory that was not mapped Antoine Tenart <antoine.tenart(a)free-electrons.com> crypto: inside-secure - fix hash when length is a multiple of a block Junaid Shahid <junaids(a)google.com> crypto: aesni - Fix out-of-bounds access of the AAD buffer in generic-gcm-aesni Junaid Shahid <junaids(a)google.com> crypto: aesni - Fix out-of-bounds access of the data buffer in generic-gcm-aesni Sabrina Dubroca <sd(a)queasysnail.net> crypto: aesni - add wrapper for generic gcm(aes) Sabrina Dubroca <sd(a)queasysnail.net> crypto: aesni - fix typo in generic_gcmaes_decrypt Stephan Mueller <smueller(a)chronox.de> crypto: aesni - handle zero length dst buffer Hauke Mehrtens <hauke(a)hauke-m.de> crypto: ecdh - fix typo in KPP dependency of CRYPTO_ECDH Takashi Iwai <tiwai(a)suse.de> ALSA: hda - Reduce the suspend time consumption for ALC256 Linus Walleij <linus.walleij(a)linaro.org> gpio: Fix kernel stack leak to userspace Patrice Chotard <patrice.chotard(a)st.com> gpio: stmpe: i2c transfer are forbiden in atomic context Joel Stanley <joel(a)jms.id.au> tools/gpio: Fix build error with musl libc Lukas Wunner <lukas(a)wunner.de> Bluetooth: hci_serdev: Init hci_uart proto_lock to avoid oops ------------- Diffstat: Makefile | 4 +- arch/x86/crypto/aesni-intel_asm.S | 199 ++++++--------------- arch/x86/crypto/aesni-intel_glue.c | 70 ++++++-- crypto/Kconfig | 2 +- crypto/af_alg.c | 10 +- crypto/sha3_generic.c | 5 +- drivers/android/binder.c | 12 ++ drivers/android/binder_alloc.c | 2 +- drivers/bluetooth/hci_serdev.c | 1 + drivers/crypto/inside-secure/safexcel_hash.c | 56 ++++-- drivers/firmware/efi/Kconfig | 5 +- drivers/gpio/gpio-ath79.c | 3 + drivers/gpio/gpio-iop.c | 4 + drivers/gpio/gpio-stmpe.c | 20 +-- drivers/gpio/gpiolib.c | 3 + drivers/hid/wacom_sys.c | 24 +-- drivers/hid/wacom_wac.c | 16 +- drivers/iio/adc/stm32-adc.c | 3 +- drivers/iio/chemical/ccs811.c | 13 +- drivers/input/rmi4/rmi_driver.c | 14 +- drivers/input/rmi4/rmi_f03.c | 64 +++++-- drivers/misc/mei/pci-me.c | 5 +- drivers/mtd/nand/denali_pci.c | 4 + drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/power/reset/zx-reboot.c | 4 + drivers/scsi/aacraid/aachba.c | 15 +- drivers/scsi/aacraid/commsup.c | 9 +- drivers/scsi/storvsc_drv.c | 4 +- drivers/spi/spi-imx.c | 15 +- drivers/staging/ccree/ssi_cipher.c | 1 + drivers/staging/ccree/ssi_driver.c | 2 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c | 7 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 2 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 6 +- drivers/tty/serial/8250/8250_dw.c | 30 ++-- drivers/tty/serial/8250/8250_of.c | 5 +- drivers/tty/serial/8250/8250_uniphier.c | 5 +- drivers/tty/serial/imx.c | 14 +- drivers/tty/tty_io.c | 8 +- drivers/tty/tty_ldisc.c | 4 +- drivers/usb/class/cdc-acm.c | 5 +- drivers/usb/gadget/function/f_fs.c | 3 +- drivers/usb/gadget/udc/core.c | 2 +- drivers/usb/serial/Kconfig | 1 + drivers/usb/serial/io_edgeport.c | 1 - drivers/usb/serial/option.c | 5 + drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + drivers/usb/serial/usb-serial-simple.c | 7 + drivers/usb/storage/uas.c | 7 +- include/linux/tty.h | 2 + lib/test_firmware.c | 1 + security/integrity/ima/ima_policy.c | 2 +- sound/pci/hda/patch_realtek.c | 6 +- tools/gpio/gpio-event-mon.c | 1 + tools/usb/usbip/src/usbip_bind.c | 9 + tools/usb/usbip/src/usbip_list.c | 9 + 57 files changed, 425 insertions(+), 310 deletions(-)

7 years, 8 months

5
60
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror February 2018