On Mon, Mar 24, 2025, Mingwei Zhang wrote:
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h index 9159bf1a4730..35f27366c277 100644 --- a/arch/x86/include/asm/kvm-x86-pmu-ops.h +++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h @@ -22,6 +22,8 @@ KVM_X86_PMU_OP(init) KVM_X86_PMU_OP_OPTIONAL(reset) KVM_X86_PMU_OP_OPTIONAL(deliver_pmi) KVM_X86_PMU_OP_OPTIONAL(cleanup) +KVM_X86_PMU_OP(put_guest_context) +KVM_X86_PMU_OP(load_guest_context)
For KVM, the "guest_context" part is largely superfluous, as KVM always operates on guest state, e.g. kvm_fpu_{load,put}().
I do think we should squeeze in "mediated" somewhere, otherwise the it's hard to see that these are specific to the mediated PMU.
So probably mediated_{load,put}()?
#undef KVM_X86_PMU_OP #undef KVM_X86_PMU_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7ee74bbbb0aa..4117a382739a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -568,6 +568,10 @@ struct kvm_pmu { u64 raw_event_mask; struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS]; struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
- u32 gp_eventsel_base;
- u32 gp_counter_base;
- u32 fixed_base;
- u32 cntr_shift;
Gah, my bad, "shift" was a terrible suggestion. It should be "stride".
@@ -306,6 +313,10 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu); int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel); bool vcpu_pmu_can_enable(struct kvm_vcpu *vcpu); +void kvm_pmu_put_guest_pmcs(struct kvm_vcpu *vcpu); +void kvm_pmu_load_guest_pmcs(struct kvm_vcpu *vcpu); +void kvm_pmu_put_guest_context(struct kvm_vcpu *vcpu); +void kvm_pmu_load_guest_context(struct kvm_vcpu *vcpu); bool is_vmware_backdoor_pmc(u32 pmc_idx); bool kvm_rdpmc_in_guest(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 1a7e3a897fdf..7e0d84d50b74 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -175,6 +175,22 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; } +static inline void amd_update_msr_base(struct kvm_vcpu *vcpu) +{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- if (kvm_pmu_has_perf_global_ctrl(pmu) ||
guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
pmu->gp_eventsel_base = MSR_F15H_PERF_CTL0;
pmu->gp_counter_base = MSR_F15H_PERF_CTR0;
pmu->cntr_shift = 2;
- } else {
pmu->gp_eventsel_base = MSR_K7_EVNTSEL0;
pmu->gp_counter_base = MSR_K7_PERFCTR0;
pmu->cntr_shift = 1;
- }
+}
Moving quoted text around to organize responses...
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 796b7bc4affe..ed17ab198dfb 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -460,6 +460,17 @@ static void intel_pmu_enable_fixed_counter_bits(struct kvm_pmu *pmu, u64 bits) pmu->fixed_ctr_ctrl_rsvd &= ~intel_fixed_bits_by_idx(i, bits); } +static inline void intel_update_msr_base(struct kvm_vcpu *vcpu) +{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- pmu->gp_eventsel_base = MSR_P6_EVNTSEL0;
- pmu->gp_counter_base = fw_writes_is_enabled(vcpu) ?
MSR_IA32_PMC0 : MSR_IA32_PERFCTR0;
This is wrong. And I unintentionally proved that it's wrong, by goofing when I fixed up this code and using MSR_IA32_PERFCTR0 instead of MSR_IA32_PMC0.
Whether or not the guest supports full-width writes is irrelevant, because support for FW writes doesn't change the width of the counters. Just because the *guest* can't directly write all e.g. 48 bits doesn't mean clobbering bits 47:32 is ok.
Similarly, on the AMD side, using the legacy interface in KVM is unnecessary. The guest may be limited to those MSRs, but KVM has a hard dependency on PMU v2, so just unconditionally use MSR_F15H_PERF_CTR0 (and for the record, because I had to look it up, the newfangled MSRs on AMD are aliased to the legacy MSRs for 0..3).
Very happily, that means the MSRs don't need to be per-PMU, and they don't even need to be configured at runtime for a given vendor. Simply require FW writes on Intel to enable the mediated PMU, and then hardcode the GP base to MSR_IA32_PMC0.
+static void amd_put_guest_context(struct kvm_vcpu *vcpu) +{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, pmu->global_ctrl);
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
- rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, pmu->global_status);
- /* Clear global status bits if non-zero */
- if (pmu->global_status)
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, pmu->global_status);
- kvm_pmu_put_guest_pmcs(vcpu);
+}
+static void amd_load_guest_context(struct kvm_vcpu *vcpu) +{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- u64 global_status;
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
Back when I suggested we give up on trying to handle PMCs and eventsels in common x86, this WRMSR didn't exist. Now that it does, I don't see anything that prevents invoking kvm_pmu_{load,put}_guest_pmcs() from common x86, KVM just needs to clear GLOBAL_CTRL before setting eventsels and PMCs.
For the load path:
/* * Disable all counters before loading event selectors and PMCs so that * KVM doesn't enable or load guest counters while host events are * active. VMX will enable/disabled counters at VM-Enter/VM-Exit by * atomically loading PERF_GLOBAL_CONTROL. SVM effectively performs * the switch by configuring all events to be GUEST_ONLY. */ wrmsrl(kvm_pmu_ops.PERF_GLOBAL_CTRL, 0);
kvm_pmu_load_guest_pmcs(vcpu);
kvm_pmu_call(mediated_load)(vcpu);
And for the put path, just reverse the ordering:
/* * Defer handling of PERF_GLOBAL_CTRL to vendor code. On Intel, it's * atomically cleared on VM-Exit, i.e. doesn't need to be clear here. */ kvm_pmu_call(mediated_put)(vcpu);
kvm_pmu_put_guest_pmcs(vcpu);
perf_put_guest_context();
On Intel, PERF_GLOBAL_CTRL is cleared on VM-Exit, and on AMD, the vendor hook will clear it. The fact that vendor code sets other MSRs is irrelevant, what matters is that all counters are quieseced.
I think it's still worth having helpers, but they can be static locals.
- kvm_pmu_load_guest_pmcs(vcpu);
- rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, global_status);
- /* Clear host global_status MSR if non-zero. */
- if (global_status)
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, global_status);
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, pmu->global_status);
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, pmu->global_ctrl);
+}
static void intel_pmu_update_msr_intercepts(struct kvm_vcpu *vcpu) @@ -809,6 +822,50 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) } } +static void intel_put_guest_context(struct kvm_vcpu *vcpu) +{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- /* Global ctrl register is already saved at VM-exit. */
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, pmu->global_status);
- /* Clear hardware MSR_CORE_PERF_GLOBAL_STATUS MSR, if non-zero. */
- if (pmu->global_status)
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, pmu->global_status);
- rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, pmu->fixed_ctr_ctrl_hw);
- /*
* Clear hardware FIXED_CTR_CTRL MSR to avoid information leakage and
* also avoid these guest fixed counters get accidentially enabled
* during host running when host enable global ctrl.
*/
- if (pmu->fixed_ctr_ctrl_hw)
wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
- kvm_pmu_put_guest_pmcs(vcpu);
+}
+static void intel_load_guest_context(struct kvm_vcpu *vcpu) +{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- u64 global_status, toggle;
- /* Clear host global_ctrl MSR if non-zero. */
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, global_status);
- toggle = pmu->global_status ^ global_status;
- if (global_status & toggle)
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, global_status & toggle);
- if (pmu->global_status & toggle)
wrmsrl(MSR_CORE_PERF_GLOBAL_STATUS_SET, pmu->global_status & toggle);
- wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, pmu->fixed_ctr_ctrl_hw);
- kvm_pmu_load_guest_pmcs(vcpu);
+}