On Tue, 25 Feb 2025 00:53:57 +0000, Oliver Upton oliver.upton@linux.dev wrote:
commit 90807748ca3a ("KVM: arm64: Hide SME system registers from guests") added trap handling for SMIDR_EL1, treating it as UNDEFINED as KVM does not support SME. This is right for the most part, however KVM needs to set HCR_EL2.TID1 to _actually_ trap the register.
Unfortunately, this comes with some collateral damage as TID1 forces REVIDR_EL1 and AIDR_EL1 to trap as well. KVM has long treated these registers as "invariant" which is an awful term for the following:
Userspace sees the boot CPU values on all vCPUs
The guest sees the hardware values of the CPU on which a vCPU is scheduled
Keep the plates spinning by adding trap handling for the affected registers and repaint all of the "invariant" crud into terms of identifying an implementation. Yes, at this point we only need to set TID1 on SME hardware, but REVIDR_EL1 and AIDR_EL1 are about to become mutable anyway.
Cc: Mark Brown broonie@kernel.org Cc: stable@vger.kernel.org Fixes: 90807748ca3a ("KVM: arm64: Hide SME system registers from guests") Signed-off-by: Oliver Upton oliver.upton@linux.dev
arch/arm64/include/asm/kvm_arm.h | 4 +- arch/arm64/kvm/sys_regs.c | 175 ++++++++++++++++--------------- 2 files changed, 94 insertions(+), 85 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8d94a6c0ed5c..b01c01e55de5 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -92,12 +92,12 @@
- SWIO: Turn set/way invalidates into set/way clean+invalidate
- PTW: Take a stage2 fault if a stage1 walk steps in device memory
- TID3: Trap EL1 reads of group 3 ID registers
- TID2: Trap CTR_EL0, CCSIDR2_EL1, CLIDR_EL1, and CSSELR_EL1
*/
- TID1: Trap REVIDR_EL1, AIDR_EL1, and SMIDR_EL1
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f6cd1ea7fb55..f25a157622e3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2483,6 +2483,93 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, return true; } +/*
- For historical (ahem ABI) reasons, KVM treats MIDR_EL1, REVIDR_EL1, and
- AIDR_EL1 as "invariant" registers, meaning userspace cannot change them.
For consistency, using the past tense would make a lot more sense.
- The values made visible to userspace were the register values of the boot
- CPU.
- At the same time, reads from these registers at EL1 previously were not
- trapped, allowing the guest to read the actual hardware value. On big-little
- machines, this means the VM can see different values depending on where a
- given vCPU got scheduled.
- These registers are now trapped as collateral damage from SME, and what
- follows attempts to give a user / guest view consistent with the existing
- ABI.
- */
+static bool access_imp_id_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
+{
- if (p->is_write)
return write_to_read_only(vcpu, p, r);
- switch (reg_to_encoding(r)) {
- case SYS_REVIDR_EL1:
p->regval = read_sysreg(revidr_el1);
break;
- case SYS_AIDR_EL1:
p->regval = read_sysreg(aidr_el1);
break;
- default:
WARN_ON_ONCE(1);
- }
- return true;
+}
+static u64 __ro_after_init boot_cpu_midr_val; +static u64 __ro_after_init boot_cpu_revidr_val; +static u64 __ro_after_init boot_cpu_aidr_val;
+static void init_imp_id_regs(void) +{
- boot_cpu_midr_val = read_sysreg(midr_el1);
- boot_cpu_revidr_val = read_sysreg(revidr_el1);
- boot_cpu_aidr_val = read_sysreg(aidr_el1);
+}
+static int get_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
u64 *val)
+{
- switch (reg_to_encoding(r)) {
- case SYS_MIDR_EL1:
*val = boot_cpu_midr_val;
break;
- case SYS_REVIDR_EL1:
*val = boot_cpu_revidr_val;
break;
- case SYS_AIDR_EL1:
*val = boot_cpu_aidr_val;
break;
- default:
WARN_ON_ONCE(1);
return -EINVAL;
- }
- return 0;
+}
+static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
u64 val)
+{
- u64 expected;
- int ret;
- ret = get_imp_id_reg(vcpu, r, &expected);
- if (ret)
return ret;
- return (expected == val) ? 0 : -EINVAL;
+}
+#define IMPLEMENTATION_ID(reg) { \
- SYS_DESC(SYS_##reg), \
- .access = access_imp_id_reg, \
- .get_user = get_imp_id_reg, \
- .set_user = set_imp_id_reg, \
+} /*
- Architected system registers.
@@ -2532,7 +2619,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DBGVCR32_EL2), undef_access, reset_val, DBGVCR32_EL2, 0 },
- IMPLEMENTATION_ID(MIDR_EL1), { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
- IMPLEMENTATION_ID(REVIDR_EL1),
/* * ID regs: all ID_SANITISED() entries here must have corresponding @@ -2804,6 +2893,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 }, { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, { SYS_DESC(SYS_SMIDR_EL1), undef_access },
- IMPLEMENTATION_ID(AIDR_EL1), { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, ID_FILTERED(CTR_EL0, ctr_el0, CTR_EL0_DIC_MASK |
I don't think this is enough. You also need to augment the cp15[] table to handle trapping for the 32bit versions of REVIDR/AIDR.
Thanks,
M.