MKTME repurposes the high bit of physical address to key id for encryption key and, even though MAXPHYADDR in CPUID[0x80000008] remains the same, the valid bits in the MTRR mask register are based on the reduced number of physical address bits.
detect_tme() in arch/x86/kernel/cpu/intel.c detects TME and subtracts it from the total usable physical bits, but it is called too late. Move the call to early_init_intel() so that it is called in setup_arch(), before MTRRs are setup.
This fixes boot on some TDX-enabled systems which until now only worked with "disable_mtrr_cleanup". Without the patch, the values written to the MTRRs mask registers were 52-bit wide (e.g. 0x000fffff_80000800) and the writes failed; with the patch, the values are 46-bit wide, which matches the reduced MAXPHYADDR that is shown in /proc/cpuinfo.
Fixes: cb06d8e3d020 ("x86/tme: Detect if TME and MKTME is activated by BIOS", 2018-03-12) Reported-by: Zixi Chen zixchen@redhat.com Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Xiaoyao Li xiaoyao.li@intel.com Cc: Kai Huang kai.huang@linux.intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: x86@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com --- arch/x86/kernel/cpu/intel.c | 178 ++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 87 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 579e34bdf7cd..70ee316a97a9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -181,6 +181,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) return false; }
+#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme_early(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + */ + c->x86_phys_bits -= keyid_bits; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -332,6 +416,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (detect_extended_topology_early(c) < 0) detect_ht_early(c); + + /* + * Adjust the number of physical bits early because it affects the + * valid bits of the MTRR mask registers. + */ + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme_early(c); }
static void bsp_init_intel(struct cpuinfo_x86 *c) @@ -492,90 +583,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif }
-#define MSR_IA32_TME_ACTIVATE 0x982 - -/* Helpers to access TME_ACTIVATE MSR */ -#define TME_ACTIVATE_LOCKED(x) (x & 0x1) -#define TME_ACTIVATE_ENABLED(x) (x & 0x2) - -#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ -#define TME_ACTIVATE_POLICY_AES_XTS_128 0 - -#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ - -#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ -#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 - -/* Values for mktme_status (SW only construct) */ -#define MKTME_ENABLED 0 -#define MKTME_DISABLED 1 -#define MKTME_UNINITIALIZED 2 -static int mktme_status = MKTME_UNINITIALIZED; - -static void detect_tme(struct cpuinfo_x86 *c) -{ - u64 tme_activate, tme_policy, tme_crypto_algs; - int keyid_bits = 0, nr_keyids = 0; - static u64 tme_activate_cpu0 = 0; - - rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); - - if (mktme_status != MKTME_UNINITIALIZED) { - if (tme_activate != tme_activate_cpu0) { - /* Broken BIOS? */ - pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); - pr_err_once("x86/tme: MKTME is not usable\n"); - mktme_status = MKTME_DISABLED; - - /* Proceed. We may need to exclude bits from x86_phys_bits. */ - } - } else { - tme_activate_cpu0 = tme_activate; - } - - if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { - pr_info_once("x86/tme: not enabled by BIOS\n"); - mktme_status = MKTME_DISABLED; - return; - } - - if (mktme_status != MKTME_UNINITIALIZED) - goto detect_keyid_bits; - - pr_info("x86/tme: enabled by BIOS\n"); - - tme_policy = TME_ACTIVATE_POLICY(tme_activate); - if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) - pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); - - tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); - if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { - pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", - tme_crypto_algs); - mktme_status = MKTME_DISABLED; - } -detect_keyid_bits: - keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); - nr_keyids = (1UL << keyid_bits) - 1; - if (nr_keyids) { - pr_info_once("x86/mktme: enabled by BIOS\n"); - pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); - } else { - pr_info_once("x86/mktme: disabled by BIOS\n"); - } - - if (mktme_status == MKTME_UNINITIALIZED) { - /* MKTME is usable */ - mktme_status = MKTME_ENABLED; - } - - /* - * KeyID bits effectively lower the number of physical address - * bits. Update cpuinfo_x86::x86_phys_bits accordingly. - */ - c->x86_phys_bits -= keyid_bits; -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -712,9 +719,6 @@ static void init_intel(struct cpuinfo_x86 *c)
init_ia32_feat_ctl(c);
- if (cpu_has(c, X86_FEATURE_TME)) - detect_tme(c); - init_intel_misc_features(c);
split_lock_init();
On Tue, Jan 30, 2024 at 07:04:00PM +0100, Paolo Bonzini wrote:
MKTME repurposes the high bit of physical address to key id for encryption key and, even though MAXPHYADDR in CPUID[0x80000008] remains the same, the valid bits in the MTRR mask register are based on the reduced number of physical address bits.
detect_tme() in arch/x86/kernel/cpu/intel.c detects TME and subtracts it from the total usable physical bits, but it is called too late. Move the call to early_init_intel() so that it is called in setup_arch(), before MTRRs are setup.
This fixes boot on some TDX-enabled systems which until now only worked with "disable_mtrr_cleanup". Without the patch, the values written to the MTRRs mask registers were 52-bit wide (e.g. 0x000fffff_80000800) and the writes failed; with the patch, the values are 46-bit wide, which matches the reduced MAXPHYADDR that is shown in /proc/cpuinfo.
Fixes: cb06d8e3d020 ("x86/tme: Detect if TME and MKTME is activated by BIOS", 2018-03-12) Reported-by: Zixi Chen zixchen@redhat.com Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Xiaoyao Li xiaoyao.li@intel.com Cc: Kai Huang kai.huang@linux.intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: x86@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com
I've seen the patch before, although by different author and with different commit message, not sure what is going on.
I had concern about that patch and I don't think it was addressed. See the thread:
https://lore.kernel.org/all/20231002224752.33qa2lq7q2w4nqws@box
On 1/31/24 09:39, Kirill A . Shutemov wrote:
On Tue, Jan 30, 2024 at 07:04:00PM +0100, Paolo Bonzini wrote:
MKTME repurposes the high bit of physical address to key id for encryption key and, even though MAXPHYADDR in CPUID[0x80000008] remains the same, the valid bits in the MTRR mask register are based on the reduced number of physical address bits.
detect_tme() in arch/x86/kernel/cpu/intel.c detects TME and subtracts it from the total usable physical bits, but it is called too late. Move the call to early_init_intel() so that it is called in setup_arch(), before MTRRs are setup.
This fixes boot on some TDX-enabled systems which until now only worked with "disable_mtrr_cleanup". Without the patch, the values written to the MTRRs mask registers were 52-bit wide (e.g. 0x000fffff_80000800) and the writes failed; with the patch, the values are 46-bit wide, which matches the reduced MAXPHYADDR that is shown in /proc/cpuinfo.
Fixes: cb06d8e3d020 ("x86/tme: Detect if TME and MKTME is activated by BIOS", 2018-03-12) Reported-by: Zixi Chen zixchen@redhat.com Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Xiaoyao Li xiaoyao.li@intel.com Cc: Kai Huang kai.huang@linux.intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: x86@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com
I've seen the patch before, although by different author and with different commit message, not sure what is going on.
I had concern about that patch and I don't think it was addressed.
Wow, slightly crazy that two people came up with exactly the same patch, including adding the comment before the moved call. And yes, this patch only works until 6.6. :/
The commit that moved get_cpu_address_size(), which has sha id fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach"), was buggy for AMD processors; and it was noticed in the thread you linked, but never addressed. It works more or less by chance because early_init_amd() calls init_amd(), but the x86_phys_bits value remains wrong for most of the boot process. The MTRRs are also initialized wrongly, but that at least doesn't cause a #GP on AMD SME.
I think the correct fix is something like
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0b97bcde70c6..fbc4e60d027c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1589,6 +1589,7 @@ static void __init early_identify_cpu( get_cpu_vendor(c); get_cpu_cap(c); setup_force_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); cpu_parse_early_param();
if (this_cpu->c_early_init) @@ -1601,10 +1602,9 @@ static void __init early_identify_cpu( this_cpu->c_bsp_init(c); } else { setup_clear_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); }
- get_cpu_address_sizes(c); - setup_force_cpu_cap(X86_FEATURE_ALWAYS);
cpu_set_bug_bits(c);
on top of which my (or Jeremy's) patch can be applied. I'll test it and send a v2 of this patch.
Paolo
See the thread:
https://lore.kernel.org/all/20231002224752.33qa2lq7q2w4nqws@box
On Tue, Jan 30, 2024 at 07:04:00PM +0100, Paolo Bonzini wrote:
MKTME repurposes the high bit of physical address to key id for encryption key and, even though MAXPHYADDR in CPUID[0x80000008] remains the same, the valid bits in the MTRR mask register are based on the reduced number of physical address bits.
detect_tme() in arch/x86/kernel/cpu/intel.c detects TME and subtracts it from the total usable physical bits, but it is called too late. Move the call to early_init_intel() so that it is called in setup_arch(), before MTRRs are setup.
This fixes boot on some TDX-enabled systems which until now only worked with "disable_mtrr_cleanup". Without the patch, the values written to the MTRRs mask registers were 52-bit wide (e.g. 0x000fffff_80000800) and the writes failed; with the patch, the values are 46-bit wide, which matches the reduced MAXPHYADDR that is shown in /proc/cpuinfo.
Fixes: cb06d8e3d020 ("x86/tme: Detect if TME and MKTME is activated by BIOS", 2018-03-12) Reported-by: Zixi Chen zixchen@redhat.com Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Xiaoyao Li xiaoyao.li@intel.com Cc: Kai Huang kai.huang@linux.intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: x86@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com
I've seen the patch before, although by different author and with different commit message, not sure what is going on.
I had concern about that patch and I don't think it was addressed. See the thread:
https://lore.kernel.org/all/20231002224752.33qa2lq7q2w4nqws@box
linux-stable-mirror@lists.linaro.org