[PATCH 2/2] KVM: selftests: Add KVM/PV clock selftest to prove timer drift correction - Linux-kselftest-mirror

8 Apr 2024

This test proves that there is an inherent KVM/PV clock drift away from the
guest TSC when KVM decides to update the PV time information structure due
to a KVM_REQ_MASTERCLOCK_UPDATE. This drift is exascerbated when a guest is
using TSC scaling and running at a different frequency to the host TSC [1].
It also proves that KVM_[GS]ET_CLOCK_GUEST API is working to mitigate the
drift from TSC to within ±1ns.
The test simply records the PVTI (PV time information) at time of guest
creation, after KVM has updated it's mapped PVTI structure and once the
correction has taken place.
A singular point in time is then recorded via the guest TSC and is used to
calculate the a PV clock value using each of the 3 PVTI structures.
As seen below a drift of ~3500ns is observed if no correction has taken
place after KVM has updated the PVTI via master clock update. However,
after the correction a delta of at most 1ns can be seen.
* selftests: kvm: pvclock_test
* scaling tsc from 2999999KHz to 1499999KHz
* before=5038374946 uncorrected=5038371437 corrected=5038374945
* delta_uncorrected=3509 delta_corrected=1
Clocksource check code has been borrowed from [2].
[1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
[2]: https://lore.kernel.org/kvm/20240106083346.29180-1-dongli.zhang@oracle.com/
Signed-off-by: Jack Allister jalliste@amazon.com
CC: David Woodhouse dwmw2@infradead.org
CC: Paul Durrant paul@xen.org
---
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/x86_64/pvclock_test.c       | 223 ++++++++++++++++++
 2 files changed, 224 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/pvclock_test.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 741c7dc16afc..02ee1205bbed 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -87,6 +87,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
 TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
 TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
 TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86_64 += x86_64/pvclock_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
 TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
diff --git a/tools/testing/selftests/kvm/x86_64/pvclock_test.c b/tools/testing/selftests/kvm/x86_64/pvclock_test.c
new file mode 100644
index 000000000000..172ef4d19c60
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pvclock_test.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2024, Amazon.com, Inc. or its affiliates.
+ *
+ * Tests for pvclock API
+ * KVM_SET_CLOCK_GUEST/KVM_GET_CLOCK_GUEST
+ */
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+enum {
+	STAGE_FIRST_BOOT,
+	STAGE_UNCORRECTED,
+	STAGE_CORRECTED,
+	NUM_STAGES
+};
+
+#define KVMCLOCK_GPA 0xc0000000ull
+#define KVMCLOCK_SIZE sizeof(struct pvclock_vcpu_time_info)
+
+static void trigger_pvti_update(vm_paddr_t pvti_pa)
+{
+	/*
+	 * We need a way to trigger KVM to update the fields
+	 * in the PV time info. The easiest way to do this is
+	 * to temporarily switch to the old KVM system time
+	 * method and then switch back to the new one.
+	 */
+	wrmsr(MSR_KVM_SYSTEM_TIME, pvti_pa | KVM_MSR_ENABLED);
+	wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+}
+
+static void guest_code(vm_paddr_t pvti_pa)
+{
+	struct pvclock_vcpu_time_info *pvti_va =
+		(struct pvclock_vcpu_time_info *)pvti_pa;
+
+	struct pvclock_vcpu_time_info pvti_boot;
+	struct pvclock_vcpu_time_info pvti_uncorrected;
+	struct pvclock_vcpu_time_info pvti_corrected;
+	uint64_t cycles_boot;
+	uint64_t cycles_uncorrected;
+	uint64_t cycles_corrected;
+	uint64_t tsc_guest;
+
+	/*
+	 * Setup the KVMCLOCK in the guest & store the original
+	 * PV time structure that is used.
+	 */
+	wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+	pvti_boot = *pvti_va;
+	GUEST_SYNC(STAGE_FIRST_BOOT);
+
+	/*
+	 * Trigger an update of the PVTI, if we calculate
+	 * the KVM clock using this structure we'll see
+	 * a drift from the TSC.
+	 */
+	trigger_pvti_update(pvti_pa);
+	pvti_uncorrected = *pvti_va;
+	GUEST_SYNC(STAGE_UNCORRECTED);
+
+	/*
+	 * The test should have triggered the correction by this
+	 * point in time. We have a copy of each of the PVTI structs
+	 * at each stage now.
+	 *
+	 * Let's sample the timestamp at a SINGLE point in time and
+	 * then calculate what the KVM clock would be using the PVTI
+	 * from each stage.
+	 *
+	 * Then return each of these values to the tester.
+	 */
+	pvti_corrected = *pvti_va;
+	tsc_guest = rdtsc();
+
+	cycles_boot = __pvclock_read_cycles(&pvti_boot, tsc_guest);
+	cycles_uncorrected = __pvclock_read_cycles(&pvti_uncorrected, tsc_guest);
+	cycles_corrected = __pvclock_read_cycles(&pvti_corrected, tsc_guest);
+
+	GUEST_SYNC_ARGS(STAGE_CORRECTED, cycles_boot, cycles_uncorrected,
+			cycles_corrected, 0);
+}
+
+static void run_test(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+	uint64_t ucall_reason;
+	struct pvclock_vcpu_time_info pvti_before;
+	uint64_t before, uncorrected, corrected;
+	int64_t delta_uncorrected, delta_corrected;
+
+	/* Loop through each stage of the test. */
+	while (true) {
+
+		/* Start/restart the running vCPU code. */
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		/* Retrieve and verify our stage. */
+		ucall_reason = get_ucall(vcpu, &uc);
+		TEST_ASSERT(ucall_reason == UCALL_SYNC,
+			    "Unhandled ucall reason=%lu",
+			    ucall_reason);
+
+		/* Run host specific code relating to stage. */
+		switch (uc.args[1]) {
+		case STAGE_FIRST_BOOT:
+			/* Store the KVM clock values before an update. */
+			vm_ioctl(vm, KVM_GET_CLOCK_GUEST, &pvti_before);
+
+			/* Sleep for a set amount of time to induce drift. */
+			sleep(5);
+			break;
+
+		case STAGE_UNCORRECTED:
+			/* Restore the KVM clock values. */
+			vm_ioctl(vm, KVM_SET_CLOCK_GUEST, &pvti_before);
+			break;
+
+		case STAGE_CORRECTED:
+			/* Query the clock information and verify delta. */
+			before = uc.args[2];
+			uncorrected = uc.args[3];
+			corrected = uc.args[4];
+
+			delta_uncorrected = before - uncorrected;
+			delta_corrected = before - corrected;
+
+			pr_info("before=%lu uncorrected=%lu corrected=%lu\n",
+				before, uncorrected, corrected);
+
+			pr_info("delta_uncorrected=%ld delta_corrected=%ld\n",
+				delta_uncorrected, delta_corrected);
+
+			TEST_ASSERT((delta_corrected <= 1) && (delta_corrected >= -1),
+				    "larger than expected delta detected = %ld", delta_corrected);
+			return;
+		}
+	}
+}
+
+#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
+
+static void check_clocksource(void)
+{
+	char *clk_name;
+	struct stat st;
+	FILE *fp;
+
+	fp = fopen(CLOCKSOURCE_PATH, "r");
+	if (!fp) {
+		pr_info("failed to open clocksource file: %d; assuming TSC.\n",
+			errno);
+		return;
+	}
+
+	if (fstat(fileno(fp), &st)) {
+		pr_info("failed to stat clocksource file: %d; assuming TSC.\n",
+			errno);
+		goto out;
+	}
+
+	clk_name = malloc(st.st_size);
+	TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n");
+
+	if (!fgets(clk_name, st.st_size, fp)) {
+		pr_info("failed to read clocksource file: %d; assuming TSC.\n",
+			ferror(fp));
+		goto out;
+	}
+
+	TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size),
+		    "clocksource not supported: %s", clk_name);
+out:
+	fclose(fp);
+}
+
+static void configure_pvclock(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+	unsigned int gpages;
+
+	gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, KVMCLOCK_SIZE);
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    KVMCLOCK_GPA, 1, gpages, 0);
+	virt_map(vm, KVMCLOCK_GPA, KVMCLOCK_GPA, gpages);
+
+	vcpu_args_set(vcpu, 1, KVMCLOCK_GPA);
+}
+
+static void configure_scaled_tsc(struct kvm_vcpu *vcpu)
+{
+	uint64_t tsc_khz;
+
+	tsc_khz =  __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+	pr_info("scaling tsc from %ldKHz to %ldKHz\n", tsc_khz, tsc_khz / 2);
+	tsc_khz /= 2;
+	vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *)tsc_khz);
+}
+
+int main(void)
+{
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
+
+	check_clocksource();
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	configure_pvclock(vm, vcpu);
+	configure_scaled_tsc(vcpu);
+
+	run_test(vm, vcpu);
+
+	return 0;
+}
-- 
2.40.1