Extend the vDSO for fast-path access to auxiliary clocks (CLOCK_AUX). The implementation is based on the generic vDSO infrastructure and works for all its supported architectures. Namely x86, arm, arm64, riscv, powerpc, loongarch and s390. No changes to userspace are necessary.
Based on timers/ptp of tip.git.
This also depends on v6.16-rc2 *exactly*. The specific dependency is commit 11fcf368506d ("uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again"), which is available in v6.16-rc2. Unfortunately that got broken again in v6.16-rc3 by commit fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources"). Another fix for this is pending [0] and should make it into v6.16.
[0] https://lore.kernel.org/lkml/20250630-uapi-genmask-v1-1-eb0ad956a83e@linutro...
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- Thomas Weißschuh (14): selftests/timers: Add testcase for auxiliary clocks vdso/vsyscall: Introduce a helper to fill clock configurations vdso/vsyscall: Split up __arch_update_vsyscall() into __arch_update_vdso_clock() vdso/helpers: Add helpers for seqlocks of single vdso_clock vdso/gettimeofday: Return bool from clock_getres() helpers vdso/gettimeofday: Return bool from clock_gettime() helpers vdso/gettimeofday: Introduce vdso_clockid_valid() vdso/gettimeofday: Introduce vdso_set_timespec() vdso/gettimeofday: Introduce vdso_get_timestamp() vdso: Introduce aux_clock_resolution_ns() vdso/vsyscall: Update auxiliary clock data in the datapage vdso/gettimeofday: Add support for auxiliary clocks Revert "selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers" selftests/timers/auxclock: Test vDSO functionality
arch/arm64/include/asm/vdso/vsyscall.h | 7 +- include/asm-generic/vdso/vsyscall.h | 6 +- include/linux/timekeeper_internal.h | 13 + include/vdso/auxclock.h | 13 + include/vdso/datapage.h | 5 + include/vdso/helpers.h | 40 ++- kernel/time/namespace.c | 5 + kernel/time/timekeeping.c | 18 +- kernel/time/vsyscall.c | 70 ++++-- lib/vdso/gettimeofday.c | 212 ++++++++++------ tools/testing/selftests/timers/.gitignore | 1 + tools/testing/selftests/timers/Makefile | 2 +- tools/testing/selftests/timers/auxclock.c | 406 ++++++++++++++++++++++++++++++ tools/testing/selftests/vDSO/Makefile | 2 - tools/testing/selftests/vDSO/parse_vdso.c | 3 +- 15 files changed, 683 insertions(+), 120 deletions(-) --- base-commit: 4e83b31e48cf2e62aeaed5cd9875c851e36a90d9 change-id: 20250630-vdso-auxclock-97abdf8e042a
Best regards,
Auxiliary clocks behave differently from regular ones. Add a testcase to validate their functionality.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- tools/testing/selftests/timers/.gitignore | 1 + tools/testing/selftests/timers/Makefile | 2 +- tools/testing/selftests/timers/auxclock.c | 319 ++++++++++++++++++++++++++++++ 3 files changed, 321 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore index bb5326ff900b8edc3aa2d8d596599973593fbaf0..dcee43b3ecd9351c9bb0483088d712ccd7b57367 100644 --- a/tools/testing/selftests/timers/.gitignore +++ b/tools/testing/selftests/timers/.gitignore @@ -20,3 +20,4 @@ valid-adjtimex adjtick set-tz freq-step +auxclock diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 32203593c62e1e0cdfd3de6f567ea1e82913f2ef..3a8833b3fb7449495c66a92c4d82e35a6755b5e8 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -5,7 +5,7 @@ LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ - inconsistency-check raw_skew threadtest rtcpie + inconsistency-check raw_skew threadtest rtcpie auxclock
DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ skew_consistency clocksource-switch freq-step leap-a-day \ diff --git a/tools/testing/selftests/timers/auxclock.c b/tools/testing/selftests/timers/auxclock.c new file mode 100644 index 0000000000000000000000000000000000000000..0ba2f9996114ade3147f0f3aec49904556a23cd4 --- /dev/null +++ b/tools/testing/selftests/timers/auxclock.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Work around type conflicts between libc and the UAPI headers */ +#define _SYS_TIME_H +#define __timeval_defined +#define _GNU_SOURCE + +#include <fcntl.h> +#include <linux/types.h> +#include <linux/timex.h> +#include <sched.h> +#include <stdio.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#ifndef CLOCK_AUX +#define CLOCK_AUX 16 +#endif + +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC 1000000000ULL +#endif + +#define AUXCLOCK_SELFTEST_TIMENS_OFFSET 10000 + +static int configure_auxclock(__kernel_clockid_t clockid, bool enable) +{ + char path[100]; + int fd, ret; + + ret = snprintf(path, sizeof(path), + "/sys/kernel/time/aux_clocks/%d/aux_clock_enable", + (int)clockid - CLOCK_AUX); + if (ret >= sizeof(path)) + return -ENOSPC; + + fd = open(path, O_WRONLY); + if (fd == -1) + return -errno; + + /* Always disable to reset */ + ret = dprintf(fd, "0\n"); + if (enable) + ret = dprintf(fd, "1\n"); + close(fd); + + if (ret < 0) + return ret; + + return 0; +} + +/* Everything is done in terms of 64bit time values to keep the code readable */ + +static inline void timespec_to_kernel_timespec(const struct timespec *ts, + struct __kernel_timespec *kts) +{ + if (!kts) + return; + + kts->tv_sec = ts->tv_sec; + kts->tv_nsec = ts->tv_nsec; +} + +static inline void kernel_timespec_to_timespec(const struct __kernel_timespec *kts, + struct timespec *ts) +{ + if (!kts) + return; + + ts->tv_sec = kts->tv_sec; + ts->tv_nsec = kts->tv_nsec; +} + +static int sys_clock_getres_time64(__kernel_clockid_t clockid, struct __kernel_timespec *ts) +{ +#if defined(__NR_clock_getres_time64) + return syscall(__NR_clock_getres_time64, clockid, ts); +#elif defined(__NR_clock_getres) + struct timespec _ts; + int ret; + + ret = syscall(__NR_clock_getres, clockid, &_ts); + if (!ret) + timespec_to_kernel_timespec(&_ts, ts); + return ret; +#else +#error "No clock_getres() support" +#endif +} + +static int sys_clock_gettime64(__kernel_clockid_t clockid, struct __kernel_timespec *ts) +{ +#if defined(__NR_clock_gettime64) + return syscall(__NR_clock_gettime64, clockid, ts); +#elif defined(__NR_clock_gettime) + struct timespec _ts; + int ret; + + ret = syscall(__NR_clock_gettime, clockid, &_ts); + if (!ret) + timespec_to_kernel_timespec(&_ts, ts); + return ret; +#else +#error "No clock_gettime() support" +#endif +} + +static int sys_clock_settime64(__kernel_clockid_t clockid, const struct __kernel_timespec *ts) +{ +#if defined(__NR_clock_settime64) + return syscall(__NR_clock_settime64, clockid, ts); +#elif defined(__NR_clock_settime) + struct timespec _ts; + + kernel_timespec_to_timespec(ts, &_ts); + return syscall(__NR_clock_settime, clockid, &_ts); +#else +#error "No clock_settime() support" +#endif +} + +static int sys_clock_adjtime64(__kernel_clockid_t clockid, struct __kernel_timex *tx) +{ +#if defined(__NR_clock_adjtime64) + return syscall(__NR_clock_adjtime64, clockid, tx); +#elif __LONG_WIDTH__ == 64 && defined(__NR_clock_adjtime) + return syscall(__NR_clock_adjtime, clockid, tx); +#else +#error "No clock_adjtime() support" +#endif +} + +FIXTURE(auxclock) {}; + +FIXTURE_VARIANT(auxclock) { + __kernel_clockid_t clock; + bool clock_enabled; + bool use_timens; +}; + +FIXTURE_VARIANT_ADD(auxclock, default) { + .clock = CLOCK_AUX, + .clock_enabled = true, + .use_timens = false, +}; + +FIXTURE_VARIANT_ADD(auxclock, timens) { + .clock = CLOCK_AUX, + .clock_enabled = true, + .use_timens = true, +}; + +FIXTURE_VARIANT_ADD(auxclock, disabled) { + .clock = CLOCK_AUX, + .clock_enabled = false, + .use_timens = false, +}; + +/* No timens_disabled to keep the testmatrix smaller. */ + +static void enter_timens(struct __test_metadata *_metadata) +{ + int ret, fd; + char buf[100]; + + ret = unshare(CLONE_NEWTIME); + if (ret != 0 && errno == EPERM) + SKIP(return, "no permissions for unshare(CLONE_NEWTIME)"); + if (ret != 0 && errno == EINVAL) + SKIP(return, "time namespaces not available"); + ASSERT_EQ(0, ret) TH_LOG("unshare(CLONE_NEWTIME) failed: %s", strerror(errno)); + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd == -1 && errno == ENOENT) + SKIP(return, "no support for time namespaces"); + ASSERT_NE(-1, fd); + /* Fiddle with the namespace to make the tests more meaningful */ + ret = snprintf(buf, sizeof(buf), "monotonic %d 0\nboottime %d 0\n", + AUXCLOCK_SELFTEST_TIMENS_OFFSET, AUXCLOCK_SELFTEST_TIMENS_OFFSET); + ASSERT_TRUE(ret > 0 && ret < sizeof(buf)); + ret = write(fd, buf, ret); + ASSERT_NE(-1, ret); + close(fd); + fd = open("/proc/self/ns/time_for_children", O_RDONLY); + ASSERT_NE(-1, fd); + ret = setns(fd, CLONE_NEWTIME); + close(fd); + ASSERT_EQ(0, ret); +} + +FIXTURE_SETUP(auxclock) { + int ret; + + ret = configure_auxclock(variant->clock, variant->clock_enabled); + if (ret == -ENOENT) + SKIP(return, "auxclocks not enabled"); + ASSERT_EQ(0, ret); + + if (variant->use_timens) + enter_timens(_metadata); +} + +FIXTURE_TEARDOWN(auxclock) { + int ret; + + ret = configure_auxclock(variant->clock, false); + ASSERT_EQ(0, ret); +} + +TEST_F(auxclock, sys_clock_getres) { + struct __kernel_timespec ts; + int ret; + + /* clock_getres() is always expected to work */ + ret = sys_clock_getres_time64(variant->clock, &ts); + ASSERT_EQ(0, ret); + ASSERT_EQ(0, ts.tv_sec); + ASSERT_EQ(1, ts.tv_nsec); +} + +TEST_F(auxclock, sys_clock_gettime) { + struct __kernel_timespec ts; + int ret; + + ret = sys_clock_gettime64(variant->clock, &ts); + if (variant->clock_enabled) { + ASSERT_EQ(0, ret); + } else { + ASSERT_EQ(-1, ret); + ASSERT_EQ(ENODEV, errno); + } +} + +static void auxclock_validate_progression(struct __test_metadata *_metadata, + const struct __kernel_timespec *a, + const struct __kernel_timespec *b) +{ + int64_t diff; + + diff = (b->tv_sec - a->tv_sec) * NSEC_PER_SEC; + diff += b->tv_nsec - a->tv_nsec; + + /* Arbitrary values */ + ASSERT_LT(1, diff); + ASSERT_GT(1 * NSEC_PER_SEC, diff); +} + +TEST_F(auxclock, sys_clock_settime) { + struct __kernel_timespec a, b = {}; + int ret; + + a.tv_sec = 1234; + a.tv_nsec = 5678; + + ret = sys_clock_settime64(variant->clock, &a); + if (!variant->clock_enabled) { + ASSERT_EQ(-1, ret); + ASSERT_EQ(ENODEV, errno); + return; + } + + ASSERT_EQ(0, ret); + + ret = sys_clock_gettime64(variant->clock, &b); + ASSERT_EQ(0, ret); + + auxclock_validate_progression(_metadata, &a, &b); +} + +TEST_F(auxclock, sys_clock_adjtime) { + struct __kernel_timex tx; + int ret, realtime_freq; + + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_FREQUENCY; + ret = sys_clock_adjtime64(CLOCK_REALTIME, &tx); + ASSERT_NE(-1, ret); + ASSERT_TRUE(tx.modes & ADJ_FREQUENCY); + realtime_freq = tx.freq; + + ret = sys_clock_adjtime64(variant->clock, &tx); + if (variant->clock_enabled) { + ASSERT_NE(-1, ret); + ASSERT_EQ(realtime_freq, tx.freq); + } else { + ASSERT_EQ(-1, ret); + ASSERT_EQ(ENODEV, errno); + } +} + +TEST_F(auxclock, progression) { + struct __kernel_timespec a, b; + int ret; + + if (!variant->clock_enabled) { + TH_LOG("no progression on disabled clocks"); + return; + } + + /* set up reference */ + ret = sys_clock_gettime64(variant->clock, &a); + ASSERT_EQ(0, ret); + + for (int i = 0; i < 100; i++) { + memset(&b, 0, sizeof(b)); + ret = sys_clock_gettime64(variant->clock, &b); + ASSERT_EQ(0, ret); + auxclock_validate_progression(_metadata, &a, &b); + + memset(&a, 0, sizeof(a)); + ret = sys_clock_gettime64(variant->clock, &a); + ASSERT_EQ(0, ret); + auxclock_validate_progression(_metadata, &b, &a); + } +} + +TEST_HARNESS_MAIN
The logic to configure a 'struct vdso_clock' from a 'struct tk_read_base' is copied two times. Split it into a shared function to reduce the duplication, especially as another user will be added for auxiliary clocks.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- kernel/time/vsyscall.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-)
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 32ef27c71b57aaa4ed898c0dbfdefcb73b8ff56a..d655df2597336f7305bfc74e2a87c651f314267b 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -15,26 +15,25 @@
#include "timekeeping_internal.h"
+static inline void fill_clock_configuration(struct vdso_clock *vc, const struct tk_read_base *base) +{ + vc->cycle_last = base->cycle_last; +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT + vc->max_cycles = base->clock->max_cycles; +#endif + vc->mask = base->mask; + vc->mult = base->mult; + vc->shift = base->shift; +} + static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct timekeeper *tk) { struct vdso_clock *vc = vdata->clock_data; struct vdso_timestamp *vdso_ts; u64 nsec, sec;
- vc[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; -#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT - vc[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles; -#endif - vc[CS_HRES_COARSE].mask = tk->tkr_mono.mask; - vc[CS_HRES_COARSE].mult = tk->tkr_mono.mult; - vc[CS_HRES_COARSE].shift = tk->tkr_mono.shift; - vc[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; -#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT - vc[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles; -#endif - vc[CS_RAW].mask = tk->tkr_raw.mask; - vc[CS_RAW].mult = tk->tkr_raw.mult; - vc[CS_RAW].shift = tk->tkr_raw.shift; + fill_clock_configuration(&vc[CS_HRES_COARSE], &tk->tkr_mono); + fill_clock_configuration(&vc[CS_RAW], &tk->tkr_raw);
/* CLOCK_MONOTONIC */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
The upcoming auxiliary clocks need this hook, too. To separate the architecture hooks from the timekeeper internals, refactor the hook to only operate on a single vDSO clock.
While at it, use a more robust #define for the hook override.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- arch/arm64/include/asm/vdso/vsyscall.h | 7 +++---- include/asm-generic/vdso/vsyscall.h | 6 +++--- kernel/time/vsyscall.c | 3 ++- 3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index de58951b8df6a4bb9afd411878793c79c30adbf2..417aae5763a86f39acffe1e6f96cb0e57212d04b 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -13,12 +13,11 @@ * Update the vDSO data page to keep in sync with kernel timekeeping. */ static __always_inline -void __arm64_update_vsyscall(struct vdso_time_data *vdata) +void __arch_update_vdso_clock(struct vdso_clock *vc) { - vdata->clock_data[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; - vdata->clock_data[CS_RAW].mask = VDSO_PRECISION_MASK; + vc->mask = VDSO_PRECISION_MASK; } -#define __arch_update_vsyscall __arm64_update_vsyscall +#define __arch_update_vdso_clock __arch_update_vdso_clock
/* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index b550afa15ecd101d821f51ce9105903978dced40..7fc0b560007dd8f09a2f24ace76ce68579ad17c1 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -22,11 +22,11 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo
#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
-#ifndef __arch_update_vsyscall -static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata) +#ifndef __arch_update_vdso_clock +static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc) { } -#endif /* __arch_update_vsyscall */ +#endif /* __arch_update_vdso_clock */
#ifndef __arch_sync_vdso_time_data static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata) diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index d655df2597336f7305bfc74e2a87c651f314267b..df6bada2d58ed9a03e5dd3cb4b218983089a2877 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -118,7 +118,8 @@ void update_vsyscall(struct timekeeper *tk) if (clock_mode != VDSO_CLOCKMODE_NONE) update_vdso_time_data(vdata, tk);
- __arch_update_vsyscall(vdata); + __arch_update_vdso_clock(&vc[CS_HRES_COARSE]); + __arch_update_vdso_clock(&vc[CS_RAW]);
vdso_write_end(vdata);
Auxiliary clocks will have their vDSO data in a dedicated 'struct vdso_clock', which needs to be synchronized independently.
Add a helper to synchronize a single vDSO clock.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- include/vdso/helpers.h | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-)
diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 0a98fed550ba66a84a620fbbd6aee3e3029b4772..a5679f5efdfdcaaf6efd5f4a317d1f132c3dc617 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -28,32 +28,46 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, return seq != start; }
-static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +static __always_inline void vdso_write_begin_clock(struct vdso_clock *vc, bool last) { - struct vdso_clock *vc = vd->clock_data; - /* * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the + * updates to vc->seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); - smp_wmb(); + WRITE_ONCE(vc->seq, vc->seq + 1); + + if (last) + smp_wmb(); }
-static __always_inline void vdso_write_end(struct vdso_time_data *vd) +static __always_inline void vdso_write_end_clock(struct vdso_clock *vc, bool first) { - struct vdso_clock *vc = vd->clock_data; + if (first) + smp_wmb();
- smp_wmb(); /* * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the + * updates to vc->seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); + WRITE_ONCE(vc->seq, vc->seq + 1); +} + +static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +{ + struct vdso_clock *vc = vd->clock_data; + + vdso_write_begin_clock(&vc[CS_HRES_COARSE], false); + vdso_write_begin_clock(&vc[CS_RAW], true); +} + +static __always_inline void vdso_write_end(struct vdso_time_data *vd) +{ + struct vdso_clock *vc = vd->clock_data; + + vdso_write_end_clock(&vc[CS_HRES_COARSE], true); + vdso_write_end_clock(&vc[CS_RAW], false); }
#endif /* !__ASSEMBLY__ */
The internal helpers are effectively using boolean results, while pretending to use error numbers.
Switch the return type to bool for more clarity.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- lib/vdso/gettimeofday.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 93ef801a97ef25f66195490d14e41bebcd41982b..9b77f23566f6a35887d4c9aaefc61a971131b499 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -396,8 +396,8 @@ static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time
#ifdef VDSO_HAS_CLOCK_GETRES static __maybe_unused -int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, - struct __kernel_timespec *res) +bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, + struct __kernel_timespec *res) { const struct vdso_clock *vc = vd->clock_data; u32 msk; @@ -405,7 +405,7 @@ int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock
/* Check for negative values or invalid clocks */ if (unlikely((u32) clock >= MAX_CLOCKS)) - return -1; + return false;
if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) @@ -427,23 +427,25 @@ int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock */ ns = LOW_RES_NSEC; } else { - return -1; + return false; }
if (likely(res)) { res->tv_sec = 0; res->tv_nsec = ns; } - return 0; + return true; }
static __maybe_unused int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { - int ret = __cvdso_clock_getres_common(vd, clock, res); + bool ok;
- if (unlikely(ret)) + ok = __cvdso_clock_getres_common(vd, clock, res); + + if (unlikely(!ok)) return clock_getres_fallback(clock, res); return 0; } @@ -460,18 +462,18 @@ __cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t cloc struct old_timespec32 *res) { struct __kernel_timespec ts; - int ret; + bool ok;
- ret = __cvdso_clock_getres_common(vd, clock, &ts); + ok = __cvdso_clock_getres_common(vd, clock, &ts);
- if (unlikely(ret)) + if (unlikely(!ok)) return clock_getres32_fallback(clock, res);
if (likely(res)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } - return ret; + return 0; }
static __maybe_unused int
The internal helpers are effectively using boolean results, while pretending to use error numbers.
Switch the return type to bool for more clarity.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- lib/vdso/gettimeofday.c | 58 +++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 28 deletions(-)
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 9b77f23566f6a35887d4c9aaefc61a971131b499..c5266532a097c06f33d12e345c695357d75abf42 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -82,8 +82,8 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
static __always_inline -int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; @@ -103,11 +103,11 @@ int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *v seq = vdso_read_begin(vc);
if (unlikely(!vdso_clocksource_ok(vc))) - return -1; + return false;
cycles = __arch_get_hw_counter(vc->clock_mode, vd); if (unlikely(!vdso_cycles_ok(cycles))) - return -1; + return false; ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vc, seq))); @@ -123,7 +123,7 @@ int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *v ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); ts->tv_nsec = ns;
- return 0; + return true; } #else static __always_inline @@ -133,16 +133,16 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim }
static __always_inline -int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { - return -EINVAL; + return false; } #endif
static __always_inline -int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; u64 cycles, sec, ns; @@ -150,7 +150,7 @@ int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
/* Allows to compile the high resolution parts out */ if (!__arch_vdso_hres_capable()) - return -1; + return false;
do { /* @@ -173,11 +173,11 @@ int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, smp_rmb();
if (unlikely(!vdso_clocksource_ok(vc))) - return -1; + return false;
cycles = __arch_get_hw_counter(vc->clock_mode, vd); if (unlikely(!vdso_cycles_ok(cycles))) - return -1; + return false; ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vc, seq))); @@ -189,13 +189,13 @@ int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); ts->tv_nsec = ns;
- return 0; + return true; }
#ifdef CONFIG_TIME_NS static __always_inline -int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; @@ -223,20 +223,20 @@ int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock */ ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); ts->tv_nsec = nsec; - return 0; + return true; } #else static __always_inline -int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { - return -1; + return false; } #endif
static __always_inline -int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; u32 seq; @@ -258,10 +258,10 @@ int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, ts->tv_nsec = vdso_ts->nsec; } while (unlikely(vdso_read_retry(vc, seq)));
- return 0; + return true; }
-static __always_inline int +static __always_inline bool __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { @@ -270,7 +270,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock,
/* Check for negative values or invalid clocks */ if (unlikely((u32) clock >= MAX_CLOCKS)) - return -1; + return false;
/* * Convert the clockid to a bitmask and use it to check which @@ -284,7 +284,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, else if (msk & VDSO_RAW) vc = &vc[CS_RAW]; else - return -1; + return false;
return do_hres(vd, vc, clock, ts); } @@ -293,9 +293,11 @@ static __maybe_unused int __cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { - int ret = __cvdso_clock_gettime_common(vd, clock, ts); + bool ok;
- if (unlikely(ret)) + ok = __cvdso_clock_gettime_common(vd, clock, ts); + + if (unlikely(!ok)) return clock_gettime_fallback(clock, ts); return 0; }
With the upcoming addition of auxiliary clocks the clockid validation will become more complicated. Split it into a dedicated function to keep the code readable.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- lib/vdso/gettimeofday.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index c5266532a097c06f33d12e345c695357d75abf42..215151bd5a1320ee6edda8f334d47c739577f696 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -71,6 +71,12 @@ static inline bool vdso_cycles_ok(u64 cycles) } #endif
+static __always_inline bool vdso_clockid_valid(clockid_t clock) +{ + /* Check for negative values or invalid clocks */ + return likely((u32) clock < MAX_CLOCKS); +} + #ifdef CONFIG_TIME_NS
#ifdef CONFIG_GENERIC_VDSO_DATA_STORE @@ -268,8 +274,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, const struct vdso_clock *vc = vd->clock_data; u32 msk;
- /* Check for negative values or invalid clocks */ - if (unlikely((u32) clock >= MAX_CLOCKS)) + if (!vdso_clockid_valid(clock)) return false;
/* @@ -405,8 +410,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc u32 msk; u64 ns;
- /* Check for negative values or invalid clocks */ - if (unlikely((u32) clock >= MAX_CLOCKS)) + if (!vdso_clockid_valid(clock)) return false;
if (IS_ENABLED(CONFIG_TIME_NS) &&
This code is duplicated and with the introduction of auxiliary clocks will be duplicated even more.
Introduce a helper.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- lib/vdso/gettimeofday.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-)
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 215151bd5a1320ee6edda8f334d47c739577f696..50611ba28abfcfc7841616e8787101ea1ffcb7d8 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -77,6 +77,16 @@ static __always_inline bool vdso_clockid_valid(clockid_t clock) return likely((u32) clock < MAX_CLOCKS); }
+/* + * Must not be invoked within the sequence read section as a race inside + * that loop could result in __iter_div_u64_rem() being extremely slow. + */ +static __always_inline void vdso_set_timespec(struct __kernel_timespec *ts, u64 sec, u64 ns) +{ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; +} + #ifdef CONFIG_TIME_NS
#ifdef CONFIG_GENERIC_VDSO_DATA_STORE @@ -122,12 +132,7 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock * sec += offs->sec; ns += offs->nsec;
- /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; + vdso_set_timespec(ts, sec, ns);
return true; } @@ -188,12 +193,7 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vc, seq)));
- /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; + vdso_set_timespec(ts, sec, ns);
return true; } @@ -223,12 +223,8 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock sec += offs->sec; nsec += offs->nsec;
- /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); - ts->tv_nsec = nsec; + vdso_set_timespec(ts, sec, nsec); + return true; } #else
This code is duplicated and with the introduction of auxiliary clocks will be duplicated even more.
Introduce a helper.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- lib/vdso/gettimeofday.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 50611ba28abfcfc7841616e8787101ea1ffcb7d8..c383878bb5445a62d8fea6591e8550183852c2e6 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -87,6 +87,26 @@ static __always_inline void vdso_set_timespec(struct __kernel_timespec *ts, u64 ts->tv_nsec = ns; }
+static __always_inline +bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock *vc, + unsigned int clkidx, u64 *sec, u64 *ns) +{ + const struct vdso_timestamp *vdso_ts = &vc->basetime[clkidx]; + u64 cycles; + + if (unlikely(!vdso_clocksource_ok(vc))) + return false; + + cycles = __arch_get_hw_counter(vc->clock_mode, vd); + if (unlikely(!vdso_cycles_ok(cycles))) + return false; + + *ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); + *sec = vdso_ts->sec; + + return true; +} + #ifdef CONFIG_TIME_NS
#ifdef CONFIG_GENERIC_VDSO_DATA_STORE @@ -104,28 +124,20 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock * const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; const struct vdso_clock *vc = vd->clock_data; - const struct vdso_timestamp *vdso_ts; - u64 cycles, ns; u32 seq; s64 sec; + u64 ns;
if (clk != CLOCK_MONOTONIC_RAW) vc = &vc[CS_HRES_COARSE]; else vc = &vc[CS_RAW]; - vdso_ts = &vc->basetime[clk];
do { seq = vdso_read_begin(vc);
- if (unlikely(!vdso_clocksource_ok(vc))) - return false; - - cycles = __arch_get_hw_counter(vc->clock_mode, vd); - if (unlikely(!vdso_cycles_ok(cycles))) + if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) return false; - ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); - sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vc, seq)));
/* Add the namespace offset */ @@ -155,8 +167,7 @@ static __always_inline bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; - u64 cycles, sec, ns; + u64 sec, ns; u32 seq;
/* Allows to compile the high resolution parts out */ @@ -183,14 +194,8 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, } smp_rmb();
- if (unlikely(!vdso_clocksource_ok(vc))) + if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) return false; - - cycles = __arch_get_hw_counter(vc->clock_mode, vd); - if (unlikely(!vdso_cycles_ok(cycles))) - return false; - ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); - sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vc, seq)));
vdso_set_timespec(ts, sec, ns);
Move the constant resolution to a shared header, so the vDSO can use it and return it without going through a syscall.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- include/vdso/auxclock.h | 13 +++++++++++++ kernel/time/timekeeping.c | 6 ++++-- 2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/include/vdso/auxclock.h b/include/vdso/auxclock.h new file mode 100644 index 0000000000000000000000000000000000000000..6d6e74cbc400e7f5cd7194280759f5e5f3059900 --- /dev/null +++ b/include/vdso/auxclock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _VDSO_AUXCLOCK_H +#define _VDSO_AUXCLOCK_H + +#include <uapi/linux/time.h> +#include <uapi/linux/types.h> + +static __always_inline u64 aux_clock_resolution_ns(void) +{ + return 1; +} + +#endif /* _VDSO_AUXCLOCK_H */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6a61887eb87e364b3ef170939919a8d4046c0508..6a088378df54d561ecfeafb45a8b54333e11a9a7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -26,6 +26,8 @@ #include <linux/audit.h> #include <linux/random.h>
+#include <vdso/auxclock.h> + #include "tick-internal.h" #include "ntp_internal.h" #include "timekeeping_internal.h" @@ -2842,8 +2844,8 @@ static int aux_get_res(clockid_t id, struct timespec64 *tp) if (!clockid_aux_valid(id)) return -ENODEV;
- tp->tv_sec = 0; - tp->tv_nsec = 1; + tp->tv_sec = aux_clock_resolution_ns() / NSEC_PER_SEC; + tp->tv_nsec = aux_clock_resolution_ns() % NSEC_PER_SEC; return 0; }
Expose the auxiliary clock data so it can be read from the vDSO.
Architectures not using the generic vDSO time framework, namely SPARC64, are not supported.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- include/linux/timekeeper_internal.h | 13 ++++++++++++ include/vdso/datapage.h | 3 +++ kernel/time/namespace.c | 5 +++++ kernel/time/timekeeping.c | 12 +++++++++++ kernel/time/vsyscall.c | 40 +++++++++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index ca79938b62f396afd503e0cf06bbecf6f95cfce4..5de2e019399c72246f7d7dfad561c8b3d479dbd1 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -190,4 +190,17 @@ static inline void update_vsyscall_tz(void) } #endif
+#if defined(CONFIG_GENERIC_TIME_VSYSCALL) && defined(CONFIG_GENERIC_GETTIMEOFDAY) && \ + defined(CONFIG_POSIX_AUX_CLOCKS) + +extern void vdso_time_update_aux(struct timekeeper *tk); + +#else + +static inline void vdso_time_update_aux(struct timekeeper *tk) +{ +} + +#endif /* CONFIG_GENERIC_TIME_VSYSCALL && CONFIG_POSIX_AUX_CLOCKS */ + #endif /* _LINUX_TIMEKEEPER_INTERNAL_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 1864e76e8f691bab10813543880f71bc59afa9c0..f4c96d9ce674abb07ccd8703f1a04da7631c1677 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -38,6 +38,7 @@ struct vdso_arch_data { #endif
#define VDSO_BASES (CLOCK_TAI + 1) +#define VDSO_BASE_AUX 0 #define VDSO_HRES (BIT(CLOCK_REALTIME) | \ BIT(CLOCK_MONOTONIC) | \ BIT(CLOCK_BOOTTIME) | \ @@ -117,6 +118,7 @@ struct vdso_clock { * @arch_data: architecture specific data (optional, defaults * to an empty struct) * @clock_data: clocksource related data (array) + * @aux_clock_data: auxiliary clocksource related data (array) * @tz_minuteswest: minutes west of Greenwich * @tz_dsttime: type of DST correction * @hrtimer_res: hrtimer resolution @@ -133,6 +135,7 @@ struct vdso_time_data { struct arch_vdso_time_data arch_data;
struct vdso_clock clock_data[CS_BASES]; + struct vdso_clock aux_clock_data[MAX_AUX_CLOCKS];
s32 tz_minuteswest; s32 tz_dsttime; diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index e3642278df433c41654ffb6a8043c3fcecc2994a..667452768ed3b50e48e3cfb70f8ef68e4bed9e0b 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -242,6 +242,11 @@ static void timens_set_vvar_page(struct task_struct *task, for (i = 0; i < CS_BASES; i++) timens_setup_vdso_clock_data(&vc[i], ns);
+ if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) { + for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++) + timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns); + } + out: mutex_unlock(&offset_lock); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6a088378df54d561ecfeafb45a8b54333e11a9a7..928b8e0773f9e1b8fd700f130a700c5908fe6c27 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -66,11 +66,21 @@ static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) { return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts); } + +static inline bool tk_is_aux(const struct timekeeper *tk) +{ + return tk->id >= TIMEKEEPER_AUX_FIRST && tk->id <= TIMEKEEPER_AUX_LAST; +} #else static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) { return false; } + +static inline bool tk_is_aux(const struct timekeeper *tk) +{ + return false; +} #endif
/* flag for if timekeeping is suspended */ @@ -719,6 +729,8 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act
update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); + } else if (tk_is_aux(tk)) { + vdso_time_update_aux(tk); }
if (action & TK_CLOCK_WAS_SET) diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index df6bada2d58ed9a03e5dd3cb4b218983089a2877..62d9701db9135ba4ef377f3f534a9279d7922d2d 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -136,6 +136,46 @@ void update_vsyscall_tz(void) __arch_sync_vdso_time_data(vdata); }
+#ifdef CONFIG_POSIX_AUX_CLOCKS +void vdso_time_update_aux(struct timekeeper *tk) +{ + struct vdso_time_data *vdata = vdso_k_time_data; + struct vdso_timestamp *vdso_ts; + struct vdso_clock *vc; + s32 clock_mode; + u64 nsec; + + vc = &vdata->aux_clock_data[tk->id - TIMEKEEPER_AUX_FIRST]; + vdso_ts = &vc->basetime[VDSO_BASE_AUX]; + clock_mode = tk->tkr_mono.clock->vdso_clock_mode; + if (!tk->clock_valid) + clock_mode = VDSO_CLOCKMODE_NONE; + + /* copy vsyscall data */ + vdso_write_begin_clock(vc, true); + + vc->clock_mode = clock_mode; + + if (clock_mode != VDSO_CLOCKMODE_NONE) { + fill_clock_configuration(vc, &tk->tkr_mono); + + vdso_ts->sec = tk->xtime_sec; + + nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec += tk->offs_aux; + vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); + nsec = nsec << tk->tkr_mono.shift; + vdso_ts->nsec = nsec; + } + + __arch_update_vdso_clock(vc); + + vdso_write_end_clock(vc, true); + + __arch_sync_vdso_time_data(vdata); +} +#endif + /** * vdso_update_begin - Start of a VDSO update section *
Expose the auxiliary clocks through the vDSO.
Architectures not using the generic vDSO time framework, namely SPARC64, are not supported.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- include/vdso/datapage.h | 2 ++ lib/vdso/gettimeofday.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index f4c96d9ce674abb07ccd8703f1a04da7631c1677..02533038640e53c40291c7e09139e0f9b32f502a 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -5,6 +5,7 @@ #ifndef __ASSEMBLY__
#include <linux/compiler.h> +#include <uapi/linux/bits.h> #include <uapi/linux/time.h> #include <uapi/linux/types.h> #include <uapi/asm-generic/errno-base.h> @@ -46,6 +47,7 @@ struct vdso_arch_data { #define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \ BIT(CLOCK_MONOTONIC_COARSE)) #define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW)) +#define VDSO_AUX __GENMASK(CLOCK_AUX_LAST, CLOCK_AUX)
#define CS_HRES_COARSE 0 #define CS_RAW 1 diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index c383878bb5445a62d8fea6591e8550183852c2e6..3a54dbb8fe32849cd749e71ec2a8be44baf42816 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -2,6 +2,7 @@ /* * Generic userspace implementations of gettimeofday() and similar. */ +#include <vdso/auxclock.h> #include <vdso/datapage.h> #include <vdso/helpers.h>
@@ -74,7 +75,7 @@ static inline bool vdso_cycles_ok(u64 cycles) static __always_inline bool vdso_clockid_valid(clockid_t clock) { /* Check for negative values or invalid clocks */ - return likely((u32) clock < MAX_CLOCKS); + return likely((u32) clock < CLOCK_AUX_LAST); }
/* @@ -268,6 +269,48 @@ bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, return true; }
+static __always_inline +bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) +{ + const struct vdso_clock *vc; + u64 sec, ns; + u32 seq; + u8 idx; + + if (!IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) + return false; + + idx = clock - CLOCK_AUX; + vc = &vd->aux_clock_data[idx]; + + do { + /* + * Open coded function vdso_read_begin() to handle + * VDSO_CLOCK_TIMENS. See comment in do_hres(). + */ + while ((seq = READ_ONCE(vc->seq)) & 1) { + if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { + vd = __arch_get_vdso_u_timens_data(vd); + vc = &vd->aux_clock_data[idx]; + break; + } + cpu_relax(); + } + smp_rmb(); + + /* Auxclock disabled? */ + if (vc->clock_mode == VDSO_CLOCKMODE_NONE) + return false; + + if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns)) + return false; + } while (unlikely(vdso_read_retry(vc, seq))); + + vdso_set_timespec(ts, sec, ns); + + return true; +} + static __always_inline bool __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) @@ -289,6 +332,8 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, return do_coarse(vd, &vc[CS_HRES_COARSE], clock, ts); else if (msk & VDSO_RAW) vc = &vc[CS_RAW]; + else if (msk & VDSO_AUX) + return do_aux(vd, clock, ts); else return false;
@@ -433,6 +478,8 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc * Preserves the behaviour of posix_get_coarse_res(). */ ns = LOW_RES_NSEC; + } else if (msk & VDSO_AUX) { + ns = aux_clock_resolution_ns(); } else { return false; }
This reverts commit c9fbaa879508 ("selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers")
The kernel headers were used to make parse_vdso.c compatible with nolibc. Unfortunately linux/elf.h is incompatible with glibc's sys/auxv.h. When using glibc it is therefore not possible build parse_vdso.c as part of the same compilation unit as its caller as sys/auxv.h is needed for getauxval().
In the meantime nolibc gained its own elf.h, providing compatibility with the documented libc interfaces.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- tools/testing/selftests/vDSO/Makefile | 2 -- tools/testing/selftests/vDSO/parse_vdso.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 12a0614b9fd4983deffe5d6a7cfa06ba8d92a516..67ead1ba6cb9c6d6088680a9c4d24a9d19f7231f 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -20,8 +20,6 @@ endif
include ../lib.mk
-CFLAGS += $(TOOLS_INCLUDES) - CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tables \ -fno-stack-protector -include $(top_srcdir)/tools/include/nolibc/nolibc.h \ -I$(top_srcdir)/tools/include/nolibc/ $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 3ff00fb624a44b964cc54954f1f088cabe11a901..c6ff4413ea367ae57bc6a60073314b29f938c99d 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -19,8 +19,7 @@ #include <stdint.h> #include <string.h> #include <limits.h> -#include <linux/auxvec.h> -#include <linux/elf.h> +#include <elf.h>
#include "parse_vdso.h"
Extend the auxclock test to also cover the vDSO.
Signed-off-by: Thomas Weißschuh thomas.weissschuh@linutronix.de --- tools/testing/selftests/timers/auxclock.c | 95 +++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/timers/auxclock.c b/tools/testing/selftests/timers/auxclock.c index 0ba2f9996114ade3147f0f3aec49904556a23cd4..314037839c1c7dd32ca32722231c67bc408a2ea3 100644 --- a/tools/testing/selftests/timers/auxclock.c +++ b/tools/testing/selftests/timers/auxclock.c @@ -10,11 +10,16 @@ #include <linux/timex.h> #include <sched.h> #include <stdio.h> +#include <sys/auxv.h> #include <sys/syscall.h> #include <unistd.h>
#include "../kselftest_harness.h"
+#include "../vDSO/parse_vdso.c" +#include "../vDSO/vdso_config.h" +#include "../vDSO/vdso_call.h" + #ifndef CLOCK_AUX #define CLOCK_AUX 16 #endif @@ -133,7 +138,45 @@ static int sys_clock_adjtime64(__kernel_clockid_t clockid, struct __kernel_timex #endif }
-FIXTURE(auxclock) {}; +FIXTURE(auxclock) { + int (*vdso_clock_gettime)(__kernel_clockid_t clockid, struct timespec *ts); + int (*vdso_clock_gettime64)(__kernel_clockid_t clockid, struct __kernel_timespec *ts); + int (*vdso_clock_getres)(__kernel_clockid_t clockid, struct timespec *ts); +}; + +static int vdso_clock_gettime64(FIXTURE_DATA(auxclock) *self, __kernel_clockid_t clockid, + struct __kernel_timespec *ts) +{ + struct timespec _ts; + int ret; + + if (self->vdso_clock_gettime64) { + return VDSO_CALL(self->vdso_clock_gettime64, 2, clockid, ts); + } else if (self->vdso_clock_gettime) { + ret = VDSO_CALL(self->vdso_clock_gettime, 2, clockid, &_ts); + if (!ret) + timespec_to_kernel_timespec(&_ts, ts); + return ret; + } else { + return -ENOSYS; + } +} + +static int vdso_clock_getres_time64(FIXTURE_DATA(auxclock) *self, __kernel_clockid_t clockid, + struct __kernel_timespec *ts) +{ + struct timespec _ts; + int ret; + + if (self->vdso_clock_getres) { + ret = VDSO_CALL(self->vdso_clock_getres, 2, clockid, &_ts); + if (!ret) + timespec_to_kernel_timespec(&_ts, ts); + return ret; + } else { + return -ENOSYS; + } +}
FIXTURE_VARIANT(auxclock) { __kernel_clockid_t clock; @@ -193,6 +236,18 @@ static void enter_timens(struct __test_metadata *_metadata) FIXTURE_SETUP(auxclock) { int ret;
+#ifdef AT_SYSINFO_EHDR + unsigned long sysinfo_ehdr; + + sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + if (sysinfo_ehdr) + vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); + + self->vdso_clock_gettime = vdso_sym(versions[VDSO_VERSION], names[VDSO_NAMES][1]); + self->vdso_clock_gettime64 = vdso_sym(versions[VDSO_VERSION], names[VDSO_NAMES][5]); + self->vdso_clock_getres = vdso_sym(versions[VDSO_VERSION], names[VDSO_NAMES][3]); +#endif /* !AT_SYSINFO_EHDR */ + ret = configure_auxclock(variant->clock, variant->clock_enabled); if (ret == -ENOENT) SKIP(return, "auxclocks not enabled"); @@ -220,6 +275,20 @@ TEST_F(auxclock, sys_clock_getres) { ASSERT_EQ(1, ts.tv_nsec); }
+TEST_F(auxclock, vdso_clock_getres) { + struct __kernel_timespec ts; + int ret; + + ret = vdso_clock_getres_time64(self, variant->clock, &ts); + if (ret == -ENOSYS) { + SKIP(return, "no clock_getres() in vDSO"); + } else { + ASSERT_EQ(0, ret); + ASSERT_EQ(0, ts.tv_sec); + ASSERT_EQ(1, ts.tv_nsec); + } +} + TEST_F(auxclock, sys_clock_gettime) { struct __kernel_timespec ts; int ret; @@ -233,6 +302,20 @@ TEST_F(auxclock, sys_clock_gettime) { } }
+TEST_F(auxclock, vdso_clock_gettime) { + struct __kernel_timespec ts; + int ret; + + ret = vdso_clock_gettime64(self, variant->clock, &ts); + if (ret == -ENOSYS) { + SKIP(return, "no clock_gettime() in vDSO"); + } else if (variant->clock_enabled) { + ASSERT_EQ(0, ret); + } else { + ASSERT_EQ(-ENODEV, ret); + } +} + static void auxclock_validate_progression(struct __test_metadata *_metadata, const struct __kernel_timespec *a, const struct __kernel_timespec *b) @@ -310,9 +393,13 @@ TEST_F(auxclock, progression) { auxclock_validate_progression(_metadata, &a, &b);
memset(&a, 0, sizeof(a)); - ret = sys_clock_gettime64(variant->clock, &a); - ASSERT_EQ(0, ret); - auxclock_validate_progression(_metadata, &b, &a); + ret = vdso_clock_gettime64(self, variant->clock, &a); + if (ret == -ENOSYS) { + a = b; + } else { + ASSERT_EQ(0, ret); + auxclock_validate_progression(_metadata, &b, &a); + } } }
linux-kselftest-mirror@lists.linaro.org