This series introduces support in the ARM PMUv3 driver for partitioning PMU counters into two separate ranges by taking advantage of the MDCR_EL2.HPMN register field.
The advantage of a partitioned PMU would be to allow KVM guests direct access to a subset of PMU functionality, greatly reducing the overhead of performance monitoring in guests.
While this series could be accepted on its own merits, practically there is a lot more to be done before it will be fully useful, so I'm sending as an RFC for now.
This patch is based on v6.13-rc7. It needs a small additional change after Oliver's Debug cleanups series going into 6.14, specifically this patch [1], because it changes kvm_arm_setup_mdcr_el2() to initialize HPMN from a cached value read early in the boot process instead of reading from the register. The only sensible way I can see to deal with this is returning to reading the register.
[1] https://lore.kernel.org/kvmarm/20241219224116.3941496-3-oliver.upton@linux.d...
Colton Lewis (4): perf: arm_pmuv3: Introduce module param to partition the PMU KVM: arm64: Make guests see only counters they can access perf: arm_pmuv3: Generalize counter bitmasks perf: arm_pmuv3: Keep out of guest counter partition
arch/arm/include/asm/arm_pmuv3.h | 10 ++ arch/arm64/include/asm/arm_pmuv3.h | 10 ++ arch/arm64/kvm/pmu-emul.c | 8 +- drivers/perf/arm_pmuv3.c | 113 ++++++++++++++++-- include/linux/perf/arm_pmu.h | 2 + include/linux/perf/arm_pmuv3.h | 34 +++++- .../kvm/aarch64/vpmu_counter_access.c | 2 +- 7 files changed, 160 insertions(+), 19 deletions(-)
base-commit: 5bc55a333a2f7316b58edc7573e8e893f7acb532 -- 2.48.1.262.g85cc9f2d1e-goog
For PMUv3, the register MDCR_EL2.HPMN partitiones the PMU counters into two ranges where counters 0..HPMN-1 are accessible by EL1 and, if allowed, EL0 while counters HPMN..N are only accessible by EL2.
Introduce a module parameter in the PMUv3 driver to set this register. The name reserved_guest_counters reflects the intent to reserve some counters for the guest so they may eventually be allowed direct access to a subset of PMU functionality for increased performance.
Track HPMN and whether the pmu is partitioned in struct arm_pmu.
While FEAT_HPMN0 does allow HPMN to be set to 0, this patch specifically disallows that case because it's not useful given the intention to allow guests access to their own counters.
Signed-off-by: Colton Lewis coltonlewis@google.com --- arch/arm/include/asm/arm_pmuv3.h | 10 +++++++ arch/arm64/include/asm/arm_pmuv3.h | 10 +++++++ drivers/perf/arm_pmuv3.c | 43 ++++++++++++++++++++++++++++-- include/linux/perf/arm_pmu.h | 2 ++ include/linux/perf/arm_pmuv3.h | 7 +++++ 5 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index 2ec0e5e83fc9..49ad90486aa5 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -277,4 +277,14 @@ static inline u64 read_pmceid1(void) return val; }
+static inline u32 read_mdcr(void) +{ + return read_sysreg(mdcr_el2); +} + +static inline void write_mdcr(u32 val) +{ + write_sysreg(val, mdcr_el2); +} + #endif diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 8a777dec8d88..fc37e7e81e07 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -188,4 +188,14 @@ static inline bool is_pmuv3p9(int pmuver) return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9; }
+static inline u64 read_mdcr(void) +{ + return read_sysreg(mdcr_el2); +} + +static inline void write_mdcr(u64 val) +{ + write_sysreg(val, mdcr_el2); +} + #endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index b5cc11abc962..55f9ae560715 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare); GEN_PMU_FORMAT_ATTR(threshold);
static int sysctl_perf_user_access __read_mostly; +static u8 reserved_guest_counters __read_mostly;
static bool armv8pmu_event_is_64bit(struct perf_event *event) { @@ -500,6 +501,29 @@ static void armv8pmu_pmcr_write(u64 val) write_pmcr(val); }
+static u64 armv8pmu_mdcr_read(void) +{ + return read_mdcr(); +} + +static void armv8pmu_mdcr_write(u64 val) +{ + write_mdcr(val); + isb(); +} + +static void armv8pmu_partition(u8 hpmn) +{ + u64 mdcr = armv8pmu_mdcr_read(); + + mdcr &= ~MDCR_EL2_HPMN_MASK; + mdcr |= FIELD_PREP(ARMV8_PMU_MDCR_HPMN, hpmn); + /* Prevent guest counters counting at EL2 */ + mdcr |= ARMV8_PMU_MDCR_HPMD; + + armv8pmu_mdcr_write(mdcr); +} + static int armv8pmu_has_overflowed(u64 pmovsr) { return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK); @@ -1069,6 +1093,9 @@ static void armv8pmu_reset(void *info)
bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
+ if (cpu_pmu->partitioned) + armv8pmu_partition(cpu_pmu->hpmn); + /* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(mask); armv8pmu_disable_intens(mask); @@ -1205,6 +1232,7 @@ static void __armv8pmu_probe_pmu(void *info) { struct armv8pmu_probe_info *probe = info; struct arm_pmu *cpu_pmu = probe->pmu; + u8 pmcr_n; u64 pmceid_raw[2]; u32 pmceid[2]; int pmuver; @@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->pmuver = pmuver; probe->present = true; + pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
/* Read the nb of CNTx counters supported from PMNC */ - bitmap_set(cpu_pmu->cntr_mask, - 0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read())); + bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n); + + if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) { + cpu_pmu->hpmn = reserved_guest_counters; + cpu_pmu->partitioned = true; + } else { + reserved_guest_counters = 0; + cpu_pmu->hpmn = pmcr_n; + cpu_pmu->partitioned = false; + }
/* Add the CPU cycles counter */ set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask); @@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; } + +module_param(reserved_guest_counters, byte, 0); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4b5b83677e3f..ad97aabed25a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -101,6 +101,8 @@ struct arm_pmu { void (*reset)(void *); int (*map_event)(struct perf_event *event); DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS); + u8 hpmn; /* MDCR_EL2.HPMN: counter partition pivot */ + bool partitioned; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index d698efba28a2..d399e8c6f98e 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -223,6 +223,13 @@ ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \ ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
+/* + * Per-CPU MDCR: config reg + */ +#define ARMV8_PMU_MDCR_HPMN GENMASK(4, 0) +#define ARMV8_PMU_MDCR_HPME BIT(7) +#define ARMV8_PMU_MDCR_HPMD BIT(17) + /* * PMOVSR: counters overflow flag status reg */
On Mon, 27 Jan 2025 22:20:27 +0000, Colton Lewis coltonlewis@google.com wrote:
For PMUv3, the register MDCR_EL2.HPMN partitiones the PMU counters into two ranges where counters 0..HPMN-1 are accessible by EL1 and, if allowed, EL0 while counters HPMN..N are only accessible by EL2.
Introduce a module parameter in the PMUv3 driver to set this register. The name reserved_guest_counters reflects the intent to reserve some counters for the guest so they may eventually be allowed direct access to a subset of PMU functionality for increased performance.
Track HPMN and whether the pmu is partitioned in struct arm_pmu.
While FEAT_HPMN0 does allow HPMN to be set to 0, this patch specifically disallows that case because it's not useful given the intention to allow guests access to their own counters.
Signed-off-by: Colton Lewis coltonlewis@google.com
arch/arm/include/asm/arm_pmuv3.h | 10 +++++++ arch/arm64/include/asm/arm_pmuv3.h | 10 +++++++ drivers/perf/arm_pmuv3.c | 43 ++++++++++++++++++++++++++++-- include/linux/perf/arm_pmu.h | 2 ++ include/linux/perf/arm_pmuv3.h | 7 +++++ 5 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index 2ec0e5e83fc9..49ad90486aa5 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -277,4 +277,14 @@ static inline u64 read_pmceid1(void) return val; } +static inline u32 read_mdcr(void) +{
- return read_sysreg(mdcr_el2);
+}
+static inline void write_mdcr(u32 val) +{
- write_sysreg(val, mdcr_el2);
+}
This will obviously break the 32bit build.
#endif diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 8a777dec8d88..fc37e7e81e07 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -188,4 +188,14 @@ static inline bool is_pmuv3p9(int pmuver) return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9; } +static inline u64 read_mdcr(void) +{
- return read_sysreg(mdcr_el2);
+}
+static inline void write_mdcr(u64 val) +{
- write_sysreg(val, mdcr_el2);
+}
#endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index b5cc11abc962..55f9ae560715 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare); GEN_PMU_FORMAT_ATTR(threshold); static int sysctl_perf_user_access __read_mostly; +static u8 reserved_guest_counters __read_mostly; static bool armv8pmu_event_is_64bit(struct perf_event *event) { @@ -500,6 +501,29 @@ static void armv8pmu_pmcr_write(u64 val) write_pmcr(val); } +static u64 armv8pmu_mdcr_read(void) +{
- return read_mdcr();
+}
+static void armv8pmu_mdcr_write(u64 val) +{
- write_mdcr(val);
- isb();
+}
+static void armv8pmu_partition(u8 hpmn) +{
- u64 mdcr = armv8pmu_mdcr_read();
- mdcr &= ~MDCR_EL2_HPMN_MASK;
- mdcr |= FIELD_PREP(ARMV8_PMU_MDCR_HPMN, hpmn);
- /* Prevent guest counters counting at EL2 */
- mdcr |= ARMV8_PMU_MDCR_HPMD;
- armv8pmu_mdcr_write(mdcr);
+}
static int armv8pmu_has_overflowed(u64 pmovsr) { return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK); @@ -1069,6 +1093,9 @@ static void armv8pmu_reset(void *info) bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
- if (cpu_pmu->partitioned)
armv8pmu_partition(cpu_pmu->hpmn);
Kaboom, see below.
/* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(mask); armv8pmu_disable_intens(mask); @@ -1205,6 +1232,7 @@ static void __armv8pmu_probe_pmu(void *info) { struct armv8pmu_probe_info *probe = info; struct arm_pmu *cpu_pmu = probe->pmu;
- u8 pmcr_n; u64 pmceid_raw[2]; u32 pmceid[2]; int pmuver;
@@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info) cpu_pmu->pmuver = pmuver; probe->present = true;
- pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
/* Read the nb of CNTx counters supported from PMNC */
- bitmap_set(cpu_pmu->cntr_mask,
0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
- bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
- if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
cpu_pmu->hpmn = reserved_guest_counters;
cpu_pmu->partitioned = true;
Isn't this going to completely explode on a kernel running at EL1?
Also, how does it work in an asymmetric configuration where some CPUs can satisfy the reservation, and some can't?
- } else {
reserved_guest_counters = 0;
cpu_pmu->hpmn = pmcr_n;
cpu_pmu->partitioned = false;
- }
/* Add the CPU cycles counter */ set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask); @@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; }
+module_param(reserved_guest_counters, byte, 0); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4b5b83677e3f..ad97aabed25a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -101,6 +101,8 @@ struct arm_pmu { void (*reset)(void *); int (*map_event)(struct perf_event *event); DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
- u8 hpmn; /* MDCR_EL2.HPMN: counter partition pivot */
- bool partitioned; bool secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index d698efba28a2..d399e8c6f98e 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -223,6 +223,13 @@ ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \ ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP) +/*
- Per-CPU MDCR: config reg
- */
+#define ARMV8_PMU_MDCR_HPMN GENMASK(4, 0) +#define ARMV8_PMU_MDCR_HPME BIT(7) +#define ARMV8_PMU_MDCR_HPMD BIT(17)
I'd rather we find a way to use the existing definitions form the sysreg file rather than add more duplication.
I also don't see how the kernel knows not to access the low-numbered counters at this point. Maybe in a later patch, I'll keep reading.
M.
Hey Marc, thanks for looking.
Marc Zyngier maz@kernel.org writes:
On Mon, 27 Jan 2025 22:20:27 +0000, Colton Lewis coltonlewis@google.com wrote:
For PMUv3, the register MDCR_EL2.HPMN partitiones the PMU counters into two ranges where counters 0..HPMN-1 are accessible by EL1 and, if allowed, EL0 while counters HPMN..N are only accessible by EL2.
Introduce a module parameter in the PMUv3 driver to set this register. The name reserved_guest_counters reflects the intent to reserve some counters for the guest so they may eventually be allowed direct access to a subset of PMU functionality for increased performance.
Track HPMN and whether the pmu is partitioned in struct arm_pmu.
While FEAT_HPMN0 does allow HPMN to be set to 0, this patch specifically disallows that case because it's not useful given the intention to allow guests access to their own counters.
Signed-off-by: Colton Lewis coltonlewis@google.com
arch/arm/include/asm/arm_pmuv3.h | 10 +++++++ arch/arm64/include/asm/arm_pmuv3.h | 10 +++++++ drivers/perf/arm_pmuv3.c | 43 ++++++++++++++++++++++++++++-- include/linux/perf/arm_pmu.h | 2 ++ include/linux/perf/arm_pmuv3.h | 7 +++++ 5 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index 2ec0e5e83fc9..49ad90486aa5 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -277,4 +277,14 @@ static inline u64 read_pmceid1(void) return val; }
+static inline u32 read_mdcr(void) +{
- return read_sysreg(mdcr_el2);
+}
+static inline void write_mdcr(u32 val) +{
- write_sysreg(val, mdcr_el2);
+}
This will obviously break the 32bit build.
Dang! I did compile with 32 bit arm but I used defconfig which I now realize doesn't define CONFIG_ARM_PMUV3 even though 64 bit arm does.
#endif diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 8a777dec8d88..fc37e7e81e07 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -188,4 +188,14 @@ static inline bool is_pmuv3p9(int pmuver) return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9; }
+static inline u64 read_mdcr(void) +{
- return read_sysreg(mdcr_el2);
+}
+static inline void write_mdcr(u64 val) +{
- write_sysreg(val, mdcr_el2);
+}
- #endif
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index b5cc11abc962..55f9ae560715 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare); GEN_PMU_FORMAT_ATTR(threshold);
static int sysctl_perf_user_access __read_mostly; +static u8 reserved_guest_counters __read_mostly;
static bool armv8pmu_event_is_64bit(struct perf_event *event) { @@ -500,6 +501,29 @@ static void armv8pmu_pmcr_write(u64 val) write_pmcr(val); }
+static u64 armv8pmu_mdcr_read(void) +{
- return read_mdcr();
+}
+static void armv8pmu_mdcr_write(u64 val) +{
- write_mdcr(val);
- isb();
+}
+static void armv8pmu_partition(u8 hpmn) +{
- u64 mdcr = armv8pmu_mdcr_read();
- mdcr &= ~MDCR_EL2_HPMN_MASK;
- mdcr |= FIELD_PREP(ARMV8_PMU_MDCR_HPMN, hpmn);
- /* Prevent guest counters counting at EL2 */
- mdcr |= ARMV8_PMU_MDCR_HPMD;
- armv8pmu_mdcr_write(mdcr);
+}
- static int armv8pmu_has_overflowed(u64 pmovsr) { return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK);
@@ -1069,6 +1093,9 @@ static void armv8pmu_reset(void *info)
bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
- if (cpu_pmu->partitioned)
armv8pmu_partition(cpu_pmu->hpmn);
Kaboom, see below.
/* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(mask); armv8pmu_disable_intens(mask); @@ -1205,6 +1232,7 @@ static void __armv8pmu_probe_pmu(void *info) { struct armv8pmu_probe_info *probe = info; struct arm_pmu *cpu_pmu = probe->pmu;
- u8 pmcr_n; u64 pmceid_raw[2]; u32 pmceid[2]; int pmuver;
@@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->pmuver = pmuver; probe->present = true;
- pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
/* Read the nb of CNTx counters supported from PMNC */
- bitmap_set(cpu_pmu->cntr_mask,
0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
- bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
- if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
cpu_pmu->hpmn = reserved_guest_counters;
cpu_pmu->partitioned = true;
Isn't this going to completely explode on a kernel running at EL1?
Trying to access an EL2 register at EL1 can do that. I'll add the appropriate hypercalls.
Also, how does it work in an asymmetric configuration where some CPUs can satisfy the reservation, and some can't?
The CPUs that can't read their own value of PMCR.N below what the attempted reservation is and so do not get partitioned. Nothing changes for that CPU if it can't meet the reservation.
- } else {
reserved_guest_counters = 0;
cpu_pmu->hpmn = pmcr_n;
cpu_pmu->partitioned = false;
- }
/* Add the CPU cycles counter */ set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask); @@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; }
+module_param(reserved_guest_counters, byte, 0); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4b5b83677e3f..ad97aabed25a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -101,6 +101,8 @@ struct arm_pmu { void (*reset)(void *); int (*map_event)(struct perf_event *event); DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
- u8 hpmn; /* MDCR_EL2.HPMN: counter partition pivot */
- bool partitioned; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index d698efba28a2..d399e8c6f98e 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -223,6 +223,13 @@ ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \ ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
+/*
- Per-CPU MDCR: config reg
- */
+#define ARMV8_PMU_MDCR_HPMN GENMASK(4, 0) +#define ARMV8_PMU_MDCR_HPME BIT(7) +#define ARMV8_PMU_MDCR_HPMD BIT(17)
I'd rather we find a way to use the existing definitions form the sysreg file rather than add more duplication.
Easy enough
I also don't see how the kernel knows not to access the low-numbered counters at this point. Maybe in a later patch, I'll keep reading.
M.
-- Without deviation from the norm, progress is not possible.
Colton Lewis coltonlewis@google.com writes:
Hey Marc, thanks for looking.
*for the review
Marc Zyngier maz@kernel.org writes:
On Mon, 27 Jan 2025 22:20:27 +0000, Colton Lewis coltonlewis@google.com wrote:
/* Read the nb of CNTx counters supported from PMNC */
- bitmap_set(cpu_pmu->cntr_mask,
0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
- bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
- if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
cpu_pmu->hpmn = reserved_guest_counters;
cpu_pmu->partitioned = true;
Isn't this going to completely explode on a kernel running at EL1?
Trying to access an EL2 register at EL1 can do that. I'll add the appropriate hypercalls.
Also, how does it work in an asymmetric configuration where some CPUs can satisfy the reservation, and some can't?
The CPUs that can't read their own value of PMCR.N below what the attempted reservation is and so do not get partitioned. Nothing changes for that CPU if it can't meet the reservation.
My mistake. That does not happen. I originally did these comparisons in armv8pmu_reset which runs for every CPU but __armv8pmu_probe_pmu doesn't.
On Tue, 28 Jan 2025 22:08:27 +0000, Colton Lewis coltonlewis@google.com wrote:
- bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
- if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
cpu_pmu->hpmn = reserved_guest_counters;
cpu_pmu->partitioned = true;
Isn't this going to completely explode on a kernel running at EL1?
Trying to access an EL2 register at EL1 can do that. I'll add the appropriate hypercalls.
But what about a guest that would get passed the magic parameter? I think you want to prevent that too.
Also, how does it work in an asymmetric configuration where some CPUs can satisfy the reservation, and some can't?
The CPUs that can't read their own value of PMCR.N below what the attempted reservation is and so do not get partitioned. Nothing changes for that CPU if it can't meet the reservation.
That's not what I meant. The question really is:
- do we want the reservation to be the number of counters reserved for the host
- or do we want it to be for the guest?
On symmetric systems, it doesn't matter. On broken big-little systems, this changes everything (it has a direct impact on userspace's ability to use the counters).
I think the design should reflect a decision on the above.
M.
Marc Zyngier maz@kernel.org writes:
On Tue, 28 Jan 2025 22:08:27 +0000, Colton Lewis coltonlewis@google.com wrote:
- bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
- if (reserved_guest_counters > 0 && reserved_guest_counters <
pmcr_n) {
cpu_pmu->hpmn = reserved_guest_counters;
cpu_pmu->partitioned = true;
Isn't this going to completely explode on a kernel running at EL1?
Trying to access an EL2 register at EL1 can do that. I'll add the appropriate hypercalls.
But what about a guest that would get passed the magic parameter? I think you want to prevent that too.
That doesn't matter because the ARM manual states that when HPMN is set, reads of PMCR.N from EL1 have that value and I've made sure in the second patch KVM does that, so a guest believes it has a system where reserved_guest_counters/HPMN == PMCR.N so there is no partition.
Also, how does it work in an asymmetric configuration where some CPUs can satisfy the reservation, and some can't?
The CPUs that can't read their own value of PMCR.N below what the attempted reservation is and so do not get partitioned. Nothing changes for that CPU if it can't meet the reservation.
That's not what I meant. The question really is:
- do we want the reservation to be the number of counters reserved for the host
- or do we want it to be for the guest?
On symmetric systems, it doesn't matter. On broken big-little systems, this changes everything (it has a direct impact on userspace's ability to use the counters).
I think the design should reflect a decision on the above.
As currently written and reflected in the name reserved_guest_counters this series is making a reservation for the guest.
After talking with Oliver it probably makes more sense to make a reservation for the host. This is closer to the semantics of the underlying CPU feature.
In the limit case it's impossible to leave the host without counters. All valid values for HPMN leave the host at least 1, but should we make any guarantees beyond that?
Colton Lewis coltonlewis@google.com writes:
Marc Zyngier maz@kernel.org writes:
On Tue, 28 Jan 2025 22:08:27 +0000, Colton Lewis coltonlewis@google.com wrote:
- bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
- if (reserved_guest_counters > 0 && reserved_guest_counters <
pmcr_n) {
cpu_pmu->hpmn = reserved_guest_counters;
cpu_pmu->partitioned = true;
Isn't this going to completely explode on a kernel running at EL1?
Trying to access an EL2 register at EL1 can do that. I'll add the appropriate hypercalls.
Ooohh. Making this work at EL1 is much more complicated than adding the hypercall to write MDCR because once HPMN takes effect, the upper range of counters that the host will use is only writable at EL2. That means using any register related to any counter in the upper range would also require a hypercall. The only way around that would be to avoid using this feature in the host entirely and only enable it when we load a guest.
I know we don't like feature discrepencies between VHE and nVHE mode, but Oliver thinks it might be justified here to have PMU partitioning be VHE-only.
On Mon, Jan 27, 2025 at 4:26 PM Colton Lewis coltonlewis@google.com wrote:
For PMUv3, the register MDCR_EL2.HPMN partitiones the PMU counters into two ranges where counters 0..HPMN-1 are accessible by EL1 and, if allowed, EL0 while counters HPMN..N are only accessible by EL2.
Introduce a module parameter in the PMUv3 driver to set this register. The name reserved_guest_counters reflects the intent to reserve some counters for the guest so they may eventually be allowed direct access to a subset of PMU functionality for increased performance.
Track HPMN and whether the pmu is partitioned in struct arm_pmu.
While FEAT_HPMN0 does allow HPMN to be set to 0, this patch specifically disallows that case because it's not useful given the intention to allow guests access to their own counters.
Signed-off-by: Colton Lewis coltonlewis@google.com
arch/arm/include/asm/arm_pmuv3.h | 10 +++++++ arch/arm64/include/asm/arm_pmuv3.h | 10 +++++++ drivers/perf/arm_pmuv3.c | 43 ++++++++++++++++++++++++++++-- include/linux/perf/arm_pmu.h | 2 ++ include/linux/perf/arm_pmuv3.h | 7 +++++ 5 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index 2ec0e5e83fc9..49ad90486aa5 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -277,4 +277,14 @@ static inline u64 read_pmceid1(void) return val; }
+static inline u32 read_mdcr(void) +{
return read_sysreg(mdcr_el2);
+}
+static inline void write_mdcr(u32 val) +{
write_sysreg(val, mdcr_el2);
+}
#endif diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 8a777dec8d88..fc37e7e81e07 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -188,4 +188,14 @@ static inline bool is_pmuv3p9(int pmuver) return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9; }
+static inline u64 read_mdcr(void) +{
return read_sysreg(mdcr_el2);
+}
+static inline void write_mdcr(u64 val) +{
write_sysreg(val, mdcr_el2);
+}
#endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index b5cc11abc962..55f9ae560715 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare); GEN_PMU_FORMAT_ATTR(threshold);
static int sysctl_perf_user_access __read_mostly; +static u8 reserved_guest_counters __read_mostly;
static bool armv8pmu_event_is_64bit(struct perf_event *event) { @@ -500,6 +501,29 @@ static void armv8pmu_pmcr_write(u64 val) write_pmcr(val); }
+static u64 armv8pmu_mdcr_read(void) +{
return read_mdcr();
+}
+static void armv8pmu_mdcr_write(u64 val) +{
write_mdcr(val);
isb();
+}
+static void armv8pmu_partition(u8 hpmn) +{
u64 mdcr = armv8pmu_mdcr_read();
mdcr &= ~MDCR_EL2_HPMN_MASK;
mdcr |= FIELD_PREP(ARMV8_PMU_MDCR_HPMN, hpmn);
/* Prevent guest counters counting at EL2 */
mdcr |= ARMV8_PMU_MDCR_HPMD;
armv8pmu_mdcr_write(mdcr);
+}
static int armv8pmu_has_overflowed(u64 pmovsr) { return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK); @@ -1069,6 +1093,9 @@ static void armv8pmu_reset(void *info)
bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
if (cpu_pmu->partitioned)
armv8pmu_partition(cpu_pmu->hpmn);
/* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(mask); armv8pmu_disable_intens(mask);
@@ -1205,6 +1232,7 @@ static void __armv8pmu_probe_pmu(void *info) { struct armv8pmu_probe_info *probe = info; struct arm_pmu *cpu_pmu = probe->pmu;
u8 pmcr_n; u64 pmceid_raw[2]; u32 pmceid[2]; int pmuver;
@@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->pmuver = pmuver; probe->present = true;
pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()); /* Read the nb of CNTx counters supported from PMNC */
bitmap_set(cpu_pmu->cntr_mask,
0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
cpu_pmu->hpmn = reserved_guest_counters;
cpu_pmu->partitioned = true;
You're storing the same information 3 times. 'partitioned' is just 'reserved_guest_counters != 0' or 'cpu_pmu->hpmn != pmcr_n'.
} else {
reserved_guest_counters = 0;
cpu_pmu->hpmn = pmcr_n;
cpu_pmu->partitioned = false;
} /* Add the CPU cycles counter */ set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
@@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; }
+module_param(reserved_guest_counters, byte, 0);
Module params are generally discouraged. Since this driver can't be a module, this is a boot time only option. There's little reason this can't be a sysfs setting. There's some complexity in changing this when counters are in use (just reject the change) and when we have asymmetric PMUs. Alternatively, it could be a sysctl like user access.
Rob
Hey Rob, thanks for the review.
Rob Herring robh@kernel.org writes:
On Mon, Jan 27, 2025 at 4:26 PM Colton Lewis coltonlewis@google.com wrote:
@@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->pmuver = pmuver; probe->present = true;
pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
/* Read the nb of CNTx counters supported from PMNC */
bitmap_set(cpu_pmu->cntr_mask,
0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
if (reserved_guest_counters > 0 && reserved_guest_counters <
pmcr_n) {
cpu_pmu->hpmn = reserved_guest_counters;
cpu_pmu->partitioned = true;
You're storing the same information 3 times. 'partitioned' is just 'reserved_guest_counters != 0' or 'cpu_pmu->hpmn != pmcr_n'.
Yes, because code outside this module that isn't already reading PMCR.N will need to know the result of that comparion.
} else {
reserved_guest_counters = 0;
cpu_pmu->hpmn = pmcr_n;
cpu_pmu->partitioned = false;
}
/* Add the CPU cycles counter */ set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
@@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; }
+module_param(reserved_guest_counters, byte, 0);
Module params are generally discouraged. Since this driver can't be a module, this is a boot time only option. There's little reason this can't be a sysfs setting. There's some complexity in changing this when counters are in use (just reject the change) and when we have asymmetric PMUs. Alternatively, it could be a sysctl like user access.
I see what you mean. I'm not dealing with those complexities yet. That's why this is an RFC.
Rob
The ARM architecture specifies that when MDCR_EL2.HPMN is set, EL1 and EL0, which includes KVM guests, should read that value for PMCR.N.
Signed-off-by: Colton Lewis coltonlewis@google.com --- arch/arm64/kvm/pmu-emul.c | 8 +++++++- tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 6c5950b9ceac..052ce8c721fe 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -993,12 +993,18 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm) { struct arm_pmu *arm_pmu = kvm->arch.arm_pmu; + u8 limit; + + if (arm_pmu->partitioned) + limit = arm_pmu->hpmn - 1; + else + limit = ARMV8_PMU_MAX_GENERAL_COUNTERS;
/* * The arm_pmu->cntr_mask considers the fixed counter(s) as well. * Ignore those and return only the general-purpose counters. */ - return bitmap_weight(arm_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS); + return bitmap_weight(arm_pmu->cntr_mask, limit); }
static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu) diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index f9c0c86d7e85..4d5acdb66bc2 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -610,7 +610,7 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) */ static void run_error_test(uint64_t pmcr_n) { - pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); + pr_debug("Error test with pmcr_n %lu (larger than the host allows)\n", pmcr_n);
test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); destroy_vpmu_vm();
These bitmasks are valid for enable and interrupt registers as well as overflow registers. Generalize the names.
Signed-off-by: Colton Lewis coltonlewis@google.com --- include/linux/perf/arm_pmuv3.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index d399e8c6f98e..115ee39f693a 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -230,16 +230,23 @@ #define ARMV8_PMU_MDCR_HPME BIT(7) #define ARMV8_PMU_MDCR_HPMD BIT(17)
+/* + * Counter bitmask layouts for overflow, enable, and interrupts + */ +#define ARMV8_PMU_CNT_MASK_P GENMASK(30, 0) +#define ARMV8_PMU_CNT_MASK_C BIT(31) +#define ARMV8_PMU_CNT_MASK_F BIT_ULL(32) /* arm64 only */ +#define ARMV8_PMU_CNT_MASK_ALL (ARMV8_PMU_CNT_MASK_P | \ + ARMV8_PMU_CNT_MASK_C | \ + ARMV8_PMU_CNT_MASK_F) /* * PMOVSR: counters overflow flag status reg */ -#define ARMV8_PMU_OVSR_P GENMASK(30, 0) -#define ARMV8_PMU_OVSR_C BIT(31) -#define ARMV8_PMU_OVSR_F BIT_ULL(32) /* arm64 only */ +#define ARMV8_PMU_OVSR_P ARMV8_PMU_CNT_MASK_P +#define ARMV8_PMU_OVSR_C ARMV8_PMU_CNT_MASK_C +#define ARMV8_PMU_OVSR_F ARMV8_PMU_CNT_MASK_F /* Mask for writable bits is both P and C fields */ -#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C | \ - ARMV8_PMU_OVSR_F) - +#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_CNT_MASK_ALL /* * PMXEVTYPER: Event selection reg */
If the PMU is partitioned, keep the driver out of the guest counter partition and only use the host counter partition. Partitioning is defined by the MDCR_EL2.HPMN register field and saved in cpu_pmu->hpmn. The range 0..HPMN-1 is accessible by EL1 and EL0 while HPMN..PMCR.N is reserved for EL2.
Define some macros that take HPMN as an argument and construct mutually exclusive bitmaps for testing which partition a particular counter is in. Note that despite their different position in the bitmap, the cycle and instruction counters are always in the guest partition.
Signed-off-by: Colton Lewis coltonlewis@google.com --- drivers/perf/arm_pmuv3.c | 72 +++++++++++++++++++++++++++++----- include/linux/perf/arm_pmuv3.h | 8 ++++ 2 files changed, 70 insertions(+), 10 deletions(-)
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 55f9ae560715..c61845fad9d9 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -754,15 +754,19 @@ static void armv8pmu_disable_event_irq(struct perf_event *event) armv8pmu_disable_intens(BIT(event->hw.idx)); }
-static u64 armv8pmu_getreset_flags(void) +static u64 armv8pmu_getreset_flags(struct arm_pmu *cpu_pmu) { u64 value;
/* Read */ value = read_pmovsclr();
+ if (cpu_pmu->partitioned) + value &= ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn); + else + value &= ARMV8_PMU_OVERFLOWED_MASK; + /* Write to clear flags */ - value &= ARMV8_PMU_OVERFLOWED_MASK; write_pmovsclr(value);
return value; @@ -789,6 +793,18 @@ static void armv8pmu_disable_user_access(void) update_pmuserenr(0); }
+static bool armv8pmu_is_guest_part(struct arm_pmu *cpu_pmu, u8 idx) +{ + return cpu_pmu->partitioned && + (BIT(idx) & ARMV8_PMU_GUEST_CNT_PART(cpu_pmu->hpmn)); +} + +static bool armv8pmu_is_host_part(struct arm_pmu *cpu_pmu, u8 idx) +{ + return !cpu_pmu->partitioned || + (BIT(idx) & ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn)); +} + static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) { int i; @@ -797,6 +813,8 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) if (is_pmuv3p9(cpu_pmu->pmuver)) { u64 mask = 0; for_each_set_bit(i, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) { + if (armv8pmu_is_guest_part(cpu_pmu, i)) + continue; if (armv8pmu_event_has_user_read(cpuc->events[i])) mask |= BIT(i); } @@ -805,6 +823,8 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) /* Clear any unused counters to avoid leaking their contents */ for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) { + if (armv8pmu_is_guest_part(cpu_pmu, i)) + continue; if (i == ARMV8_PMU_CYCLE_IDX) write_pmccntr(0); else if (i == ARMV8_PMU_INSTR_IDX) @@ -850,7 +870,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) armv8pmu_disable_user_access();
/* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); + if (cpu_pmu->partitioned) + armv8pmu_mdcr_write(armv8pmu_mdcr_read() | ARMV8_PMU_MDCR_HPME); + else + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
kvm_vcpu_pmu_resync_el0(); } @@ -858,7 +881,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); + if (cpu_pmu->partitioned) + armv8pmu_mdcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_MDCR_HPME); + else + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); }
static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) @@ -872,7 +898,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) /* * Get and reset the IRQ flags */ - pmovsr = armv8pmu_getreset_flags(); + pmovsr = armv8pmu_getreset_flags(cpu_pmu);
/* * Did an overflow occur? @@ -930,6 +956,8 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, int idx;
for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) { + if (armv8pmu_is_guest_part(cpu_pmu, idx)) + continue; if (!test_and_set_bit(idx, cpuc->used_mask)) return idx; } @@ -946,6 +974,8 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, * the lower idx must be even. */ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) { + if (armv8pmu_is_guest_part(cpu_pmu, idx)) + continue; if (!(idx & 0x1)) continue; if (!test_and_set_bit(idx, cpuc->used_mask)) { @@ -968,6 +998,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
/* Always prefer to place a cycle counter into the cycle counter. */ if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && + !cpu_pmu->partitioned && !armv8pmu_event_get_threshold(&event->attr)) { if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask)) return ARMV8_PMU_CYCLE_IDX; @@ -983,6 +1014,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, * may not know how to handle it. */ if ((evtype == ARMV8_PMUV3_PERFCTR_INST_RETIRED) && + !cpu_pmu->partitioned && !armv8pmu_event_get_threshold(&event->attr) && test_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask) && !armv8pmu_event_want_user_access(event)) { @@ -994,7 +1026,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, * Otherwise use events counters */ if (armv8pmu_event_is_chained(event)) - return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); else return armv8pmu_get_single_idx(cpuc, cpu_pmu); } @@ -1086,6 +1118,15 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; }
+static void armv8pmu_reset_host_counters(struct arm_pmu *cpu_pmu) +{ + int idx; + + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) + if (armv8pmu_is_host_part(cpu_pmu, idx)) + armv8pmu_write_evcntr(idx, 0); +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1093,8 +1134,10 @@ static void armv8pmu_reset(void *info)
bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
- if (cpu_pmu->partitioned) + if (cpu_pmu->partitioned) { armv8pmu_partition(cpu_pmu->hpmn); + mask &= ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn); + }
/* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(mask); @@ -1103,11 +1146,20 @@ static void armv8pmu_reset(void *info) /* Clear the counters we flip at guest entry/exit */ kvm_clr_pmu_events(mask);
+ + pmcr = ARMV8_PMU_PMCR_LC; + /* - * Initialize & Reset PMNC. Request overflow interrupt for - * 64 bit cycle counter but cheat in armv8pmu_write_counter(). + * Initialize & Reset PMNC. Request overflow interrupt for 64 + * bit cycle counter but cheat in armv8pmu_write_counter(). + * + * When partitioned, there is no single bit to reset only the + * host counters. so reset them individually. */ - pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; + if (cpu_pmu->partitioned) + armv8pmu_reset_host_counters(cpu_pmu); + else + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C;
/* Enable long event counter support where available */ if (armv8pmu_has_long_event(cpu_pmu)) diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 115ee39f693a..5f8b143794ce 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -247,6 +247,14 @@ #define ARMV8_PMU_OVSR_F ARMV8_PMU_CNT_MASK_F /* Mask for writable bits is both P and C fields */ #define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_CNT_MASK_ALL + +/* Masks for guest and host counter partitions */ +#define ARMV8_PMU_HPMN_CNT_MASK(N) GENMASK((N) - 1, 0) +#define ARMV8_PMU_GUEST_CNT_PART(N) (ARMV8_PMU_HPMN_CNT_MASK(N) | \ + ARMV8_PMU_CNT_MASK_C | \ + ARMV8_PMU_CNT_MASK_F) +#define ARMV8_PMU_HOST_CNT_PART(N) (ARMV8_PMU_CNT_MASK_ALL & \ + ~ARMV8_PMU_GUEST_CNT_PART(N)) /* * PMXEVTYPER: Event selection reg */
On 27/01/2025 22:20, Colton Lewis wrote:
If the PMU is partitioned, keep the driver out of the guest counter partition and only use the host counter partition. Partitioning is defined by the MDCR_EL2.HPMN register field and saved in cpu_pmu->hpmn. The range 0..HPMN-1 is accessible by EL1 and EL0 while HPMN..PMCR.N is reserved for EL2.
Define some macros that take HPMN as an argument and construct mutually exclusive bitmaps for testing which partition a particular counter is in. Note that despite their different position in the bitmap, the cycle and instruction counters are always in the guest partition.
Signed-off-by: Colton Lewis coltonlewis@google.com
drivers/perf/arm_pmuv3.c | 72 +++++++++++++++++++++++++++++----- include/linux/perf/arm_pmuv3.h | 8 ++++ 2 files changed, 70 insertions(+), 10 deletions(-)
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 55f9ae560715..c61845fad9d9 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -754,15 +754,19 @@ static void armv8pmu_disable_event_irq(struct perf_event *event) armv8pmu_disable_intens(BIT(event->hw.idx)); } -static u64 armv8pmu_getreset_flags(void) +static u64 armv8pmu_getreset_flags(struct arm_pmu *cpu_pmu) { u64 value; /* Read */ value = read_pmovsclr();
- if (cpu_pmu->partitioned)
value &= ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn);
- else
value &= ARMV8_PMU_OVERFLOWED_MASK;
- /* Write to clear flags */
- value &= ARMV8_PMU_OVERFLOWED_MASK; write_pmovsclr(value);
return value; @@ -789,6 +793,18 @@ static void armv8pmu_disable_user_access(void) update_pmuserenr(0); } +static bool armv8pmu_is_guest_part(struct arm_pmu *cpu_pmu, u8 idx) +{
- return cpu_pmu->partitioned &&
(BIT(idx) & ARMV8_PMU_GUEST_CNT_PART(cpu_pmu->hpmn));
+}
+static bool armv8pmu_is_host_part(struct arm_pmu *cpu_pmu, u8 idx) +{
- return !cpu_pmu->partitioned ||
(BIT(idx) & ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn));
+}
- static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) { int i;
@@ -797,6 +813,8 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) if (is_pmuv3p9(cpu_pmu->pmuver)) { u64 mask = 0; for_each_set_bit(i, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) {
if (armv8pmu_is_guest_part(cpu_pmu, i))
}continue; if (armv8pmu_event_has_user_read(cpuc->events[i])) mask |= BIT(i);
@@ -805,6 +823,8 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) /* Clear any unused counters to avoid leaking their contents */ for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) {
if (armv8pmu_is_guest_part(cpu_pmu, i))
continue; if (i == ARMV8_PMU_CYCLE_IDX) write_pmccntr(0); else if (i == ARMV8_PMU_INSTR_IDX)
@@ -850,7 +870,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) armv8pmu_disable_user_access(); /* Enable all counters */
- armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
- if (cpu_pmu->partitioned)
armv8pmu_mdcr_write(armv8pmu_mdcr_read() | ARMV8_PMU_MDCR_HPME);
- else
armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
kvm_vcpu_pmu_resync_el0(); } @@ -858,7 +881,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { /* Disable all counters */
- armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
- if (cpu_pmu->partitioned)
armv8pmu_mdcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_MDCR_HPME);
typo: s/armv8pmu_pmcr_read/armv8pmu_mdcr_read
Suzuki
- else
}armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) @@ -872,7 +898,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) /* * Get and reset the IRQ flags */
- pmovsr = armv8pmu_getreset_flags();
- pmovsr = armv8pmu_getreset_flags(cpu_pmu);
/* * Did an overflow occur? @@ -930,6 +956,8 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, int idx; for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) {
if (armv8pmu_is_guest_part(cpu_pmu, idx))
if (!test_and_set_bit(idx, cpuc->used_mask)) return idx; }continue;
@@ -946,6 +974,8 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, * the lower idx must be even. */ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) {
if (armv8pmu_is_guest_part(cpu_pmu, idx))
if (!(idx & 0x1)) continue; if (!test_and_set_bit(idx, cpuc->used_mask)) {continue;
@@ -968,6 +998,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* Always prefer to place a cycle counter into the cycle counter. */ if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) &&
if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask)) return ARMV8_PMU_CYCLE_IDX;!cpu_pmu->partitioned && !armv8pmu_event_get_threshold(&event->attr)) {
@@ -983,6 +1014,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, * may not know how to handle it. */ if ((evtype == ARMV8_PMUV3_PERFCTR_INST_RETIRED) &&
!cpu_pmu->partitioned && !armv8pmu_event_get_threshold(&event->attr) && test_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask) && !armv8pmu_event_want_user_access(event)) {
@@ -994,7 +1026,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, * Otherwise use events counters */ if (armv8pmu_event_is_chained(event))
return armv8pmu_get_chain_idx(cpuc, cpu_pmu);
else return armv8pmu_get_single_idx(cpuc, cpu_pmu); }return armv8pmu_get_chain_idx(cpuc, cpu_pmu);
@@ -1086,6 +1118,15 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static void armv8pmu_reset_host_counters(struct arm_pmu *cpu_pmu) +{
- int idx;
- for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS)
if (armv8pmu_is_host_part(cpu_pmu, idx))
armv8pmu_write_evcntr(idx, 0);
+}
- static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
@@ -1093,8 +1134,10 @@ static void armv8pmu_reset(void *info) bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
- if (cpu_pmu->partitioned)
- if (cpu_pmu->partitioned) { armv8pmu_partition(cpu_pmu->hpmn);
mask &= ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn);
- }
/* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(mask); @@ -1103,11 +1146,20 @@ static void armv8pmu_reset(void *info) /* Clear the counters we flip at guest entry/exit */ kvm_clr_pmu_events(mask);
- pmcr = ARMV8_PMU_PMCR_LC;
- /*
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
* Initialize & Reset PMNC. Request overflow interrupt for 64
* bit cycle counter but cheat in armv8pmu_write_counter().
*
* When partitioned, there is no single bit to reset only the
*/* host counters. so reset them individually.
- pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC;
- if (cpu_pmu->partitioned)
armv8pmu_reset_host_counters(cpu_pmu);
- else
pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C;
/* Enable long event counter support where available */ if (armv8pmu_has_long_event(cpu_pmu)) diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 115ee39f693a..5f8b143794ce 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -247,6 +247,14 @@ #define ARMV8_PMU_OVSR_F ARMV8_PMU_CNT_MASK_F /* Mask for writable bits is both P and C fields */ #define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_CNT_MASK_ALL
+/* Masks for guest and host counter partitions */ +#define ARMV8_PMU_HPMN_CNT_MASK(N) GENMASK((N) - 1, 0) +#define ARMV8_PMU_GUEST_CNT_PART(N) (ARMV8_PMU_HPMN_CNT_MASK(N) | \
ARMV8_PMU_CNT_MASK_C | \
ARMV8_PMU_CNT_MASK_F)
+#define ARMV8_PMU_HOST_CNT_PART(N) (ARMV8_PMU_CNT_MASK_ALL & \
/*~ARMV8_PMU_GUEST_CNT_PART(N))
*/
- PMXEVTYPER: Event selection reg
Suzuki K Poulose suzuki.poulose@arm.com writes:
On 27/01/2025 22:20, Colton Lewis wrote:
/* Disable all counters */
- armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
- if (cpu_pmu->partitioned)
armv8pmu_mdcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_MDCR_HPME);
typo: s/armv8pmu_pmcr_read/armv8pmu_mdcr_read
Good catch. Thanks
linux-kselftest-mirror@lists.linaro.org