Re: [PATCH 2/2] KVM: selftests: Add KVM/PV clock selftest to prove timer drift correction

19 Apr 2024

On 4/8/2024 3:07 PM, Jack Allister wrote:
...
This test proves that there is an inherent KVM/PV clock drift away from the
guest TSC when KVM decides to update the PV time information structure due
to a KVM_REQ_MASTERCLOCK_UPDATE. This drift is exascerbated when a guest is
using TSC scaling and running at a different frequency to the host TSC [1].
It also proves that KVM_[GS]ET_CLOCK_GUEST API is working to mitigate the
drift from TSC to within ±1ns.
The test simply records the PVTI (PV time information) at time of guest
creation, after KVM has updated it's mapped PVTI structure and once the
correction has taken place.
A singular point in time is then recorded via the guest TSC and is used to
calculate the a PV clock value using each of the 3 PVTI structures.
As seen below a drift of ~3500ns is observed if no correction has taken
place after KVM has updated the PVTI via master clock update. However,
after the correction a delta of at most 1ns can be seen.

selftests: kvm: pvclock_test
scaling tsc from 2999999KHz to 1499999KHz
before=5038374946 uncorrected=5038371437 corrected=5038374945
delta_uncorrected=3509 delta_corrected=1

Clocksource check code has been borrowed from [2].
Signed-off-by: Jack Allister jalliste@amazon.com
CC: David Woodhouse dwmw2@infradead.org
CC: Paul Durrant paul@xen.org

tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/x86_64/pvclock_test.c       | 223 ++++++++++++++++++
 2 files changed, 224 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/pvclock_test.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 741c7dc16afc..02ee1205bbed 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -87,6 +87,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
 TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
 TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
 TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86_64 += x86_64/pvclock_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
 TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
diff --git a/tools/testing/selftests/kvm/x86_64/pvclock_test.c b/tools/testing/selftests/kvm/x86_64/pvclock_test.c
new file mode 100644
index 000000000000..172ef4d19c60
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pvclock_test.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*


Copyright © 2024, Amazon.com, Inc. or its affiliates.







Tests for pvclock API



KVM_SET_CLOCK_GUEST/KVM_GET_CLOCK_GUEST


*/

+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdio.h>



+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"



+enum {

STAGE_FIRST_BOOT,
STAGE_UNCORRECTED,
STAGE_CORRECTED,
NUM_STAGES

+};



+#define KVMCLOCK_GPA 0xc0000000ull
+#define KVMCLOCK_SIZE sizeof(struct pvclock_vcpu_time_info)



+static void trigger_pvti_update(vm_paddr_t pvti_pa)
+{

/*
* We need a way to trigger KVM to update the fields


* in the PV time info. The easiest way to do this is


* to temporarily switch to the old KVM system time


* method and then switch back to the new one.


*/


wrmsr(MSR_KVM_SYSTEM_TIME, pvti_pa | KVM_MSR_ENABLED);
wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);

+}



+static void guest_code(vm_paddr_t pvti_pa)
+{

struct pvclock_vcpu_time_info *pvti_va =
(struct pvclock_vcpu_time_info *)pvti_pa;



struct pvclock_vcpu_time_info pvti_boot;
struct pvclock_vcpu_time_info pvti_uncorrected;
struct pvclock_vcpu_time_info pvti_corrected;
uint64_t cycles_boot;
uint64_t cycles_uncorrected;
uint64_t cycles_corrected;
uint64_t tsc_guest;

/*
* Setup the KVMCLOCK in the guest & store the original


* PV time structure that is used.


*/


wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
pvti_boot = *pvti_va;
GUEST_SYNC(STAGE_FIRST_BOOT);

/*
* Trigger an update of the PVTI, if we calculate


* the KVM clock using this structure we'll see


* a drift from the TSC.


*/


trigger_pvti_update(pvti_pa);
pvti_uncorrected = *pvti_va;
GUEST_SYNC(STAGE_UNCORRECTED);

/*
* The test should have triggered the correction by this


* point in time. We have a copy of each of the PVTI structs


* at each stage now.


*


* Let's sample the timestamp at a SINGLE point in time and


* then calculate what the KVM clock would be using the PVTI


* from each stage.


*


* Then return each of these values to the tester.


*/


pvti_corrected = *pvti_va;
tsc_guest = rdtsc();

cycles_boot = __pvclock_read_cycles(&pvti_boot, tsc_guest);
cycles_uncorrected = __pvclock_read_cycles(&pvti_uncorrected, tsc_guest);
cycles_corrected = __pvclock_read_cycles(&pvti_corrected, tsc_guest);

GUEST_SYNC_ARGS(STAGE_CORRECTED, cycles_boot, cycles_uncorrected,
	cycles_corrected, 0);



+}



+static void run_test(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{

struct ucall uc;
uint64_t ucall_reason;
struct pvclock_vcpu_time_info pvti_before;
uint64_t before, uncorrected, corrected;
int64_t delta_uncorrected, delta_corrected;

/* Loop through each stage of the test. */
while (true) {

/* Start/restart the running vCPU code. */


vcpu_run(vcpu);


TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);



/* Retrieve and verify our stage. */


ucall_reason = get_ucall(vcpu, &uc);


TEST_ASSERT(ucall_reason == UCALL_SYNC,


	    "Unhandled ucall reason=%lu",


	    ucall_reason);



/* Run host specific code relating to stage. */


switch (uc.args[1]) {


case STAGE_FIRST_BOOT:


	/* Store the KVM clock values before an update. */


	vm_ioctl(vm, KVM_GET_CLOCK_GUEST, &pvti_before);



	/* Sleep for a set amount of time to induce drift. */


	sleep(5);


	break;



case STAGE_UNCORRECTED:


	/* Restore the KVM clock values. */


	vm_ioctl(vm, KVM_SET_CLOCK_GUEST, &pvti_before);


	break;



case STAGE_CORRECTED:


	/* Query the clock information and verify delta. */


	before = uc.args[2];


	uncorrected = uc.args[3];


	corrected = uc.args[4];



	delta_uncorrected = before - uncorrected;


	delta_corrected = before - corrected;



	pr_info("before=%lu uncorrected=%lu corrected=%lu\n",


		before, uncorrected, corrected);



	pr_info("delta_uncorrected=%ld delta_corrected=%ld\n",


		delta_uncorrected, delta_corrected);



	TEST_ASSERT((delta_corrected <= 1) && (delta_corrected >= -1),


		    "larger than expected delta detected = %ld", delta_corrected);



I'm wondering what's the underling theory that we definitely can achieve
±1ns accuracy? I tested it on a Sapphire Rapids @2100MHz TSC frequency,
and I can see delta_corrected=2 in ~2% cases.

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH 2/2] KVM: selftests: Add KVM/PV clock selftest to prove timer drift correction