On Sun, 20 Oct 2019, Andy Lutomirski wrote:
On Sat, Oct 19, 2019 at 3:01 AM Thomas Gleixner tglx@linutronix.de wrote:
__arch_use_vsyscall() is a pointless exercise TBH. The VDSO data should be updated unconditionally so all the trivial interfaces like time() and getres() just work independently of the functions which depend on the underlying clocksource.
This functions have a fallback operation already:
Let __arch_get_hw_counter() return U64_MAX and the syscall fallback is invoked.
My thought was that __arch_get_hw_counter() could return last-1 to indicate failure, which would allow the two checks to be folded into one check. Or we could continue to use U64_MAX and rely on the fact that (s64)U64_MAX < 0, not worry about the cycle counter overflowing, and letting cycles < last catch it.
This is not an overflow catch. It's solely to deal with the fact that on X86 you can observe (cycles < last) on multi socket systems under rare circumstances. Any other architecture does not have that issue AFAIK.
The wraparound of clocksources with a smaller width than 64bit is handled by:
delta = (cycles - last) & mask;
which operates on unsigned values for obvious reasons.
(And we should change it to return s64 at some point regardless -- all the math is signed, so the underlying types should be too IMO.)
See above. delta is guaranteed to be >= 0 and the mult/shift is not signed either. All the base values which are in the VDSO are unsigned as well.
The weird typecast there:
if ((s64)cycles < 0)
could as well be
if (cycles == U64_MAX)
but the typecasted version creates better code.
I tried to fold the two operations (see patch below) and on all machines I tested on (various generations of Intel and AMD) the result is slower than what we have now by a couple of cycles, which is a lot for these functions (i.e. between 3-5%). I'm sure I tried that before and ended up with the existing code as the fastest variant.
Why? That's subject to speculation :)
Thanks,
tglx
8<---------- arch/x86/include/asm/vdso/gettimeofday.h | 39 ++++++------------------------- lib/vdso/gettimeofday.c | 23 +++--------------- 2 files changed, 13 insertions(+), 49 deletions(-)
--- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -235,10 +235,14 @@ static u64 vread_hvclock(void) } #endif
-static inline u64 __arch_get_hw_counter(s32 clock_mode) +static inline u64 __arch_get_hw_counter(s32 clock_mode, u64 last, u64 mask) { + /* + * Mask operation is not required as all VDSO clocksources are + * 64bit wide. + */ if (clock_mode == VCLOCK_TSC) - return (u64)rdtsc_ordered(); + return (u64)rdtsc_ordered() - last; /* * For any memory-mapped vclock type, we need to make sure that gcc * doesn't cleverly hoist a load before the mode check. Otherwise we @@ -248,13 +252,13 @@ static inline u64 __arch_get_hw_counter( #ifdef CONFIG_PARAVIRT_CLOCK if (clock_mode == VCLOCK_PVCLOCK) { barrier(); - return vread_pvclock(); + return vread_pvclock() - last; } #endif #ifdef CONFIG_HYPERV_TIMER if (clock_mode == VCLOCK_HVCLOCK) { barrier(); - return vread_hvclock(); + return vread_hvclock() - last; } #endif return U64_MAX; @@ -265,33 +269,6 @@ static __always_inline const struct vdso return __vdso_data; }
-/* - * x86 specific delta calculation. - * - * The regular implementation assumes that clocksource reads are globally - * monotonic. The TSC can be slightly off across sockets which can cause - * the regular delta calculation (@cycles - @last) to return a huge time - * jump. - * - * Therefore it needs to be verified that @cycles are greater than - * @last. If not then use @last, which is the base time of the current - * conversion period. - * - * This variant also removes the masking of the subtraction because the - * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX - * which would result in a pointless operation. The compiler cannot - * optimize it away as the mask comes from the vdso data and is not compile - * time constant. - */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - if (cycles > last) - return (cycles - last) * mult; - return 0; -} -#define vdso_calc_delta vdso_calc_delta - #endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -26,34 +26,21 @@ #include <asm/vdso/gettimeofday.h> #endif /* ENABLE_COMPAT_VDSO */
-#ifndef vdso_calc_delta -/* - * Default implementation which works for all sane clocksources. That - * obviously excludes x86/TSC. - */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return ((cycles - last) & mask) * mult; -} -#endif - static int do_hres(const struct vdso_data *vd, clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; - u64 cycles, last, sec, ns; + u64 delta, sec, ns; u32 seq;
do { seq = vdso_read_begin(vd); - cycles = __arch_get_hw_counter(vd->clock_mode); - ns = vdso_ts->nsec; - last = vd->cycle_last; - if (unlikely((s64)cycles < 0)) + delta = __arch_get_hw_counter(vd->clock_mode, vd->cycle_last, + vd->mask); + if (unlikely((s64)delta < 0)) return -1;
- ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); + ns = vdso_ts->nsec + delta * vd->mult; ns >>= vd->shift; sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq)));