From: Kan Liang kan.liang@linux.intel.com
The WARN_ON(this_cpu_read(cpu_hw_events.enabled)) in the intel_pmu_save_and_restart_reload() is triggered, when sampling read topdown events.
In a NMI handler, the cpu_hw_events.enabled is set and used to indicate the status of core PMU. The generic pmu->pmu_disable_count, updated in the perf_pmu_disable/enable pair, is not touched. However, the perf_pmu_disable/enable pair is invoked when sampling read in a NMI handler. The cpuc->enabled is mistakenly set by the perf_pmu_enable().
Avoid perf_pmu_disable/enable() if the core PMU is already disabled.
Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics") Signed-off-by: Kan Liang kan.liang@linux.intel.com Cc: stable@vger.kernel.org ---
A new patch to fix the issue found on a legacy platform. (Not related to counters snapshotting feature)
But since it also touches the sampling read code, the patches to enable the counters snapshotting feature must be on top of the patch. The patch itself can be applied separately.
arch/x86/events/intel/core.c | 7 +++++-- arch/x86/events/intel/ds.c | 9 ++++++--- 2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2a2824e9c50d..bce423ad3fad 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2778,15 +2778,18 @@ DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); static void intel_pmu_read_topdown_event(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int pmu_enabled = cpuc->enabled;
/* Only need to call update_topdown_event() once for group read. */ if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && !is_slots_event(event)) return;
- perf_pmu_disable(event->pmu); + if (pmu_enabled) + perf_pmu_disable(event->pmu); static_call(intel_pmu_update_topdown_event)(event); - perf_pmu_enable(event->pmu); + if (pmu_enabled) + perf_pmu_enable(event->pmu); }
static void intel_pmu_read_event(struct perf_event *event) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba74e1198328..81b6ec8e824e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2096,11 +2096,14 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
void intel_pmu_auto_reload_read(struct perf_event *event) { - WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); + int pmu_enabled = this_cpu_read(cpu_hw_events.enabled);
- perf_pmu_disable(event->pmu); + WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); + if (pmu_enabled) + perf_pmu_disable(event->pmu); intel_pmu_drain_pebs_buffer(); - perf_pmu_enable(event->pmu); + if (pmu_enabled) + perf_pmu_enable(event->pmu); }
/*
On Wed, Jan 15, 2025 at 10:43:16AM -0800, kan.liang@linux.intel.com wrote:
From: Kan Liang kan.liang@linux.intel.com
The WARN_ON(this_cpu_read(cpu_hw_events.enabled)) in the intel_pmu_save_and_restart_reload() is triggered, when sampling read topdown events.
In a NMI handler, the cpu_hw_events.enabled is set and used to indicate the status of core PMU. The generic pmu->pmu_disable_count, updated in the perf_pmu_disable/enable pair, is not touched. However, the perf_pmu_disable/enable pair is invoked when sampling read in a NMI handler. The cpuc->enabled is mistakenly set by the perf_pmu_enable().
Avoid perf_pmu_disable/enable() if the core PMU is already disabled.
Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics") Signed-off-by: Kan Liang kan.liang@linux.intel.com Cc: stable@vger.kernel.org
arch/x86/events/intel/core.c | 7 +++++-- arch/x86/events/intel/ds.c | 9 ++++++--- 2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2a2824e9c50d..bce423ad3fad 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2778,15 +2778,18 @@ DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); static void intel_pmu_read_topdown_event(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int pmu_enabled = cpuc->enabled;
/* Only need to call update_topdown_event() once for group read. */ if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && !is_slots_event(event)) return;
- perf_pmu_disable(event->pmu);
- if (pmu_enabled)
static_call(intel_pmu_update_topdown_event)(event);perf_pmu_disable(event->pmu);
- perf_pmu_enable(event->pmu);
- if (pmu_enabled)
perf_pmu_enable(event->pmu);
} static void intel_pmu_read_event(struct perf_event *event) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba74e1198328..81b6ec8e824e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2096,11 +2096,14 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) void intel_pmu_auto_reload_read(struct perf_event *event) {
- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
- int pmu_enabled = this_cpu_read(cpu_hw_events.enabled);
- perf_pmu_disable(event->pmu);
- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
- if (pmu_enabled)
intel_pmu_drain_pebs_buffer();perf_pmu_disable(event->pmu);
- perf_pmu_enable(event->pmu);
- if (pmu_enabled)
perf_pmu_enable(event->pmu);
}
Hurmp.. would it not be nicer to merge that logic. Perhaps something like so?
--- arch/x86/events/intel/core.c | 41 +++++++++++++++++++++++------------------ arch/x86/events/intel/ds.c | 11 +---------- arch/x86/events/perf_event.h | 2 +- 3 files changed, 25 insertions(+), 29 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7601196d1d18..5b491a6815e6 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2785,28 +2785,33 @@ static u64 icl_update_topdown_event(struct perf_event *event)
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
-static void intel_pmu_read_topdown_event(struct perf_event *event) +static void intel_pmu_read_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool pmu_enabled = cpuc->enabled;
- /* Only need to call update_topdown_event() once for group read. */ - if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && - !is_slots_event(event)) - return; + /* Only need to call update_topdown_event() once for group read. */ + if (is_topdown_event(event) && !is_slots_event(event) && + (cpuc->txn_flags & PERF_PMU_TXN_READ)) + return;
- perf_pmu_disable(event->pmu); - static_call(intel_pmu_update_topdown_event)(event); - perf_pmu_enable(event->pmu); -} + cpuc->enabled = 0; + if (pmu_enabled) + intel_pmu_disable_all();
-static void intel_pmu_read_event(struct perf_event *event) -{ - if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) - intel_pmu_auto_reload_read(event); - else if (is_topdown_count(event)) - intel_pmu_read_topdown_event(event); - else - x86_perf_event_update(event); + if (is_topdown_event(event)) + static_call(intel_pmu_update_topdown_event)(event); + else + intel_pmu_drain_pebs_buffer(); + + cpuc->enabled = pmu_enabled; + if (pmu_enabled) + intel_pmu_enable_all(0); + return; + } + + x86_perf_event_update(event); }
static void intel_pmu_enable_fixed(struct perf_event *event) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba74e1198328..050098c54ae7 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -953,7 +953,7 @@ int intel_pmu_drain_bts_buffer(void) return 1; }
-static inline void intel_pmu_drain_pebs_buffer(void) +void intel_pmu_drain_pebs_buffer(void) { struct perf_sample_data data;
@@ -2094,15 +2094,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) return NULL; }
-void intel_pmu_auto_reload_read(struct perf_event *event) -{ - WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); - - perf_pmu_disable(event->pmu); - intel_pmu_drain_pebs_buffer(); - perf_pmu_enable(event->pmu); -} - /* * Special variant of intel_pmu_save_and_restart() for auto-reload. */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 31c2771545a6..38b3e30f8988 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1643,7 +1643,7 @@ void intel_pmu_pebs_disable_all(void);
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
-void intel_pmu_auto_reload_read(struct perf_event *event); +void intel_pmu_drain_pebs_buffer(void);
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
On Thu, Jan 16, 2025 at 11:32:56AM +0100, Peter Zijlstra wrote:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba74e1198328..050098c54ae7 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -953,7 +953,7 @@ int intel_pmu_drain_bts_buffer(void) return 1; } -static inline void intel_pmu_drain_pebs_buffer(void) +void intel_pmu_drain_pebs_buffer(void) { struct perf_sample_data data;
Also, while poking there, I noticed we never did the below :/
--- diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 5b491a6815e6..20a2556de645 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3081,7 +3081,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
handled++; x86_pmu_handle_guest_pebs(regs, &data); - x86_pmu.drain_pebs(regs, &data); + static_call(x86_pmu_drain_pebs)(regs, &data); status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
/* diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 050098c54ae7..75e5e6678282 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -956,8 +956,7 @@ int intel_pmu_drain_bts_buffer(void) void intel_pmu_drain_pebs_buffer(void) { struct perf_sample_data data; - - x86_pmu.drain_pebs(NULL, &data); + static_call(x86_pmu_drain_pebs)(NULL, &data); }
/* diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 38b3e30f8988..fb635e2b9909 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1107,6 +1107,7 @@ extern struct x86_pmu x86_pmu __read_mostly;
DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); +DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) {
linux-stable-mirror@lists.linaro.org