AMD proceessors define an address range that is reserved by HyperTransport and causes a failure if used for guest physical addresses. Avoid selftests failures by reserving those guest physical addresses; the rules are:
- On parts with <40 bits, its fully hidden from software.
- Before Fam17h, it was always 12G just below 1T, even if there was more RAM above this location. In this case we just not use any RAM above 1T.
- On Fam17h and later, it is variable based on SME, and is either just below 2^48 (no encryption) or 2^43 (encryption).
Fixes: ef4c9f4f6546 ("KVM: selftests: Fix 32-bit truncation of vm_get_max_gfn()") Cc: stable@vger.kernel.org Cc: David Matlack dmatlack@google.com Reported-by: Maxim Levitsky mlevitsk@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Message-Id: 20210805105423.412878-1-pbonzini@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com --- .../testing/selftests/kvm/include/kvm_util.h | 9 +++ tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- .../selftests/kvm/lib/x86_64/processor.c | 67 +++++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 6a1a37f30494..da2b702da71a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -71,6 +71,15 @@ enum vm_guest_mode {
#endif
+#if defined(__x86_64__) +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); +#else +static inline unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{ + return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; +} +#endif + #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8f2e0bb1ef96..daf6fdb217a7 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -302,7 +302,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
/* Limit physical addresses to PA-bits. */ - vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; + vm->max_gfn = vm_compute_max_gfn(vm);
/* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 82c39db91369..b7105692661b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1431,3 +1431,70 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
return cpuid; } + +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 + +static inline unsigned x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{ + const unsigned long num_ht_pages = 12 << 18; /* 12 GiB */ + unsigned long ht_gfn, max_gfn, max_pfn; + uint32_t eax, ebx, ecx, edx; + + max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + + /* Avoid reserved HyperTransport region on AMD processors. */ + eax = ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx || + ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx || + edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) + return max_gfn; + + /* On parts with <40 physical address bits, the area is fully hidden */ + if (vm->pa_bits < 40) + return max_gfn; + + eax = 1; + cpuid(&eax, &ebx, &ecx, &edx); + if (x86_family(eax) < 0x17) { + /* Before family 17h, the HyperTransport area is just below 1T. */ + ht_gfn = (1 << 28) - num_ht_pages; + } else { + /* + * Otherwise it's at the top of the physical address + * space, possibly reduced due to SME by bits 11:6 of + * CPUID[0x8000001f].EBX. + */ + eax = 0x80000008; + cpuid(&eax, &ebx, &ecx, &edx); + max_pfn = (1ULL << ((eax & 255) - vm->page_shift)) - 1; + + eax = 0x80000000; + cpuid(&eax, &ebx, &ecx, &edx); + if (eax >= 0x8000001f) { + eax = 0x8000001f; + cpuid(&eax, &ebx, &ecx, &edx); + max_pfn >>= (ebx >> 6) & 0x3f; + } + ht_gfn = max_pfn - num_ht_pages; + } + + if (max_gfn < ht_gfn) + return max_gfn; + + return ht_gfn - 1; +}
On Thu, Dec 09, 2021, Paolo Bonzini wrote:
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{
- const unsigned long num_ht_pages = 12 << 18; /* 12 GiB */
- unsigned long ht_gfn, max_gfn, max_pfn;
- uint32_t eax, ebx, ecx, edx;
- max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
- /* Avoid reserved HyperTransport region on AMD processors. */
- eax = ecx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
- if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx ||
ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx ||
edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
return max_gfn;
- /* On parts with <40 physical address bits, the area is fully hidden */
- if (vm->pa_bits < 40)
return max_gfn;
- eax = 1;
- cpuid(&eax, &ebx, &ecx, &edx);
- if (x86_family(eax) < 0x17) {
/* Before family 17h, the HyperTransport area is just below 1T. */
ht_gfn = (1 << 28) - num_ht_pages;
- } else {
/*
* Otherwise it's at the top of the physical address
* space, possibly reduced due to SME by bits 11:6 of
* CPUID[0x8000001f].EBX.
*/
eax = 0x80000008;
cpuid(&eax, &ebx, &ecx, &edx);
Should't this check 0x80000000.eax >= 0x80000008 first? Or do we just accept failure if family==0x17 and there's no 0x80000008? One paranoid option would be to use the pre-fam17 value, e.g.
/* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; if (x86_family(eax) < 0x17) goto out;
eax = 0x80000000; cpuid(&eax, &ebx, &ecx, &edx); max_ext_leaf = eax;
/* Use the old, conservative value if MAXPHYADDR isn't enumerated. */ if (max_ext_leaf < 0x80000008) goto out;
/* comment */ eax = 0x80000008; cpuid(&eax, &ebx, &ecx, &edx); max_pfn = (1ULL << ((eax & 255) - vm->page_shift)) - 1; if (max_ext_leaf >= 0x8000001f) { <adjust> } ht_gfn = max_pfn - num_ht_pages; out: return min(max_gfn, ht_gfn - 1);
max_pfn = (1ULL << ((eax & 255) - vm->page_shift)) - 1;
LOL, "& 255", you just couldn't resist, huh? My version of Rami Code only goes up to 15. :-)
On 12/9/21 22:47, Sean Christopherson wrote:
/*
* Otherwise it's at the top of the physical address
* space, possibly reduced due to SME by bits 11:6 of
* CPUID[0x8000001f].EBX.
*/
eax = 0x80000008;
cpuid(&eax, &ebx, &ecx, &edx);
Should't this check 0x80000000.eax >= 0x80000008 first? Or do we just accept failure if family==0x17 and there's no 0x80000008? One paranoid option would be to use the pre-fam17 value, e.g.
/* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; if (x86_family(eax) < 0x17) goto out; eax = 0x80000000; cpuid(&eax, &ebx, &ecx, &edx); max_ext_leaf = eax; /* Use the old, conservative value if MAXPHYADDR isn't enumerated. */ if (max_ext_leaf < 0x80000008) goto out;
Yes, this works for me too. Though in practice I don't think any 64-bit machine ever existed without 0x80000008 (you need it to decide what's a canonical address and what isn't), so that would have to be a 32-bit fam17h machine.
Paolo
/* comment */ eax = 0x80000008; cpuid(&eax, &ebx, &ecx, &edx); max_pfn = (1ULL << ((eax & 255) - vm->page_shift)) - 1; if (max_ext_leaf >= 0x8000001f) { <adjust> } ht_gfn = max_pfn - num_ht_pages;
out: return min(max_gfn, ht_gfn - 1);
max_pfn = (1ULL << ((eax & 255) - vm->page_shift)) - 1;
LOL, "& 255", you just couldn't resist, huh? My version of Rami Code only goes up to 15.:-)
linux-stable-mirror@lists.linaro.org