In do_hres(), we currently use whether the return value of __arch_get_ hw_counter() is negtive to indicate fallback, but this is not a good idea. Because:
1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid; 2, For a 64bit counter, a "negtive" value of counter is actually valid.
To solve this problem, we use U64_MAX as the only "invalid" return value -- this is still not fully correct, but has no problem in most cases. Moreover, all vdso time-related functions should rely on the return value of __arch_use_vsyscall(), because update_vdso_data() and update_vsyscall_tz() also rely on it. So, in the core functions of __cvdso_gettimeofday(), __cvdso_clock_gettime() and __cvdso_clock_ getres(), if __arch_use_vsyscall() returns false, we use the fallback functions directly.
Fixes: 00b26474c2f1613d7ab894c5 ("lib/vdso: Provide generic VDSO implementation") Cc: stable@vger.kernel.org Cc: Arnd Bergmann arnd@arndb.de Cc: Paul Burton paul.burton@mips.com Cc: linux-mips@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Huacai Chen chenhc@lemote.com --- arch/arm64/include/asm/vdso/vsyscall.h | 2 +- arch/mips/include/asm/vdso/vsyscall.h | 2 +- include/asm-generic/vdso/vsyscall.h | 2 +- lib/vdso/gettimeofday.c | 12 +++++++++++- 4 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 0c731bf..406e6de 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -31,7 +31,7 @@ int __arm64_get_clock_mode(struct timekeeper *tk) #define __arch_get_clock_mode __arm64_get_clock_mode
static __always_inline -int __arm64_use_vsyscall(struct vdso_data *vdata) +int __arm64_use_vsyscall(const struct vdso_data *vdata) { return !vdata[CS_HRES_COARSE].clock_mode; } diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h index 1953147..8b10dd7 100644 --- a/arch/mips/include/asm/vdso/vsyscall.h +++ b/arch/mips/include/asm/vdso/vsyscall.h @@ -29,7 +29,7 @@ int __mips_get_clock_mode(struct timekeeper *tk) #define __arch_get_clock_mode __mips_get_clock_mode
static __always_inline -int __mips_use_vsyscall(struct vdso_data *vdata) +int __mips_use_vsyscall(const struct vdso_data *vdata) { return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE); } diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index e94b1978..ac05a625 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -26,7 +26,7 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk) #endif /* __arch_get_clock_mode */
#ifndef __arch_use_vsyscall -static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata) +static __always_inline int __arch_use_vsyscall(const struct vdso_data *vdata) { return 1; } diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index e630e7f..4ad062e 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -9,6 +9,7 @@ #include <linux/hrtimer_defs.h> #include <vdso/datapage.h> #include <vdso/helpers.h> +#include <vdso/vsyscall.h>
/* * The generic vDSO implementation requires that gettimeofday.h @@ -50,7 +51,7 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, cycles = __arch_get_hw_counter(vd->clock_mode); ns = vdso_ts->nsec; last = vd->cycle_last; - if (unlikely((s64)cycles < 0)) + if (unlikely(cycles == U64_MAX)) return -1;
ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); @@ -91,6 +92,9 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1;
+ if (!__arch_use_vsyscall(vd)) + return -1; + /* * Convert the clockid to a bitmask and use it to check which * clocks are handled in the VDSO directly. @@ -145,6 +149,9 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { const struct vdso_data *vd = __arch_get_vdso_data();
+ if (!__arch_use_vsyscall(vd)) + return gettimeofday_fallback(tv, tz); + if (likely(tv != NULL)) { struct __kernel_timespec ts;
@@ -189,6 +196,9 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1;
+ if (!__arch_use_vsyscall(vd)) + return -1; + hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); /* * Convert the clockid to a bitmask and use it to check which
On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen chenhc@lemote.com wrote:
In do_hres(), we currently use whether the return value of __arch_get_ hw_counter() is negtive to indicate fallback, but this is not a good idea. Because:
1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid; 2, For a 64bit counter, a "negtive" value of counter is actually valid.
s/negtive/negative
What's the actual bug? Is it that MIPS is returning 0 but the check is < 0? Sounds like MIPS should get fixed.
To solve this problem, we use U64_MAX as the only "invalid" return value -- this is still not fully correct, but has no problem in most cases.
I'm sort of okay with that, but...
Moreover, all vdso time-related functions should rely on the return value of __arch_use_vsyscall(), because update_vdso_data() and update_vsyscall_tz() also rely on it. So, in the core functions of __cvdso_gettimeofday(), __cvdso_clock_gettime() and __cvdso_clock_ getres(), if __arch_use_vsyscall() returns false, we use the fallback functions directly.
__arch_use_vsyscall() is not currently intended for use in the vDSO at all.
Fixes: 00b26474c2f1613d7ab894c5 ("lib/vdso: Provide generic VDSO implementation") Cc: stable@vger.kernel.org Cc: Arnd Bergmann arnd@arndb.de Cc: Paul Burton paul.burton@mips.com Cc: linux-mips@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Huacai Chen chenhc@lemote.com
arch/arm64/include/asm/vdso/vsyscall.h | 2 +- arch/mips/include/asm/vdso/vsyscall.h | 2 +- include/asm-generic/vdso/vsyscall.h | 2 +- lib/vdso/gettimeofday.c | 12 +++++++++++- 4 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 0c731bf..406e6de 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -31,7 +31,7 @@ int __arm64_get_clock_mode(struct timekeeper *tk) #define __arch_get_clock_mode __arm64_get_clock_mode
static __always_inline -int __arm64_use_vsyscall(struct vdso_data *vdata) +int __arm64_use_vsyscall(const struct vdso_data *vdata) { return !vdata[CS_HRES_COARSE].clock_mode; } diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h index 1953147..8b10dd7 100644 --- a/arch/mips/include/asm/vdso/vsyscall.h +++ b/arch/mips/include/asm/vdso/vsyscall.h @@ -29,7 +29,7 @@ int __mips_get_clock_mode(struct timekeeper *tk) #define __arch_get_clock_mode __mips_get_clock_mode
static __always_inline -int __mips_use_vsyscall(struct vdso_data *vdata) +int __mips_use_vsyscall(const struct vdso_data *vdata) { return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE); } diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index e94b1978..ac05a625 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -26,7 +26,7 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk) #endif /* __arch_get_clock_mode */
#ifndef __arch_use_vsyscall -static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata) +static __always_inline int __arch_use_vsyscall(const struct vdso_data *vdata) { return 1; } diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index e630e7f..4ad062e 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -9,6 +9,7 @@ #include <linux/hrtimer_defs.h> #include <vdso/datapage.h> #include <vdso/helpers.h> +#include <vdso/vsyscall.h>
/*
- The generic vDSO implementation requires that gettimeofday.h
@@ -50,7 +51,7 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, cycles = __arch_get_hw_counter(vd->clock_mode); ns = vdso_ts->nsec; last = vd->cycle_last;
if (unlikely((s64)cycles < 0))
if (unlikely(cycles == U64_MAX)) return -1;
I would actually prefer:
if (unlikely(cycles < last))
or perhaps:
if (unlikely((s64)(cycles-last) < 0))
which would have the nice side effect of getting rid of the annoying x86 special case in vdso_calc_delta(). The former version is compatible with U64_MAX, whereas the latter version would need the error case to return last-1 or similar. The benefit of the latter version is that it can survive wrap-around.
ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -91,6 +92,9 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1;
if (!__arch_use_vsyscall(vd))
return -1;
NAK. I don't think this is helpful or correct. It doesn't appear to do anything valid, and it's racy.
/* * Convert the clockid to a bitmask and use it to check which * clocks are handled in the VDSO directly.
@@ -145,6 +149,9 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { const struct vdso_data *vd = __arch_get_vdso_data();
if (!__arch_use_vsyscall(vd))
return gettimeofday_fallback(tv, tz);
Ditto.
if (likely(tv != NULL)) { struct __kernel_timespec ts;
@@ -189,6 +196,9 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1;
if (!__arch_use_vsyscall(vd))
return -1;
Ditto.
Hi Andy and Hucan,
On 10/18/19 4:15 AM, Andy Lutomirski wrote:
On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen chenhc@lemote.com wrote:
In do_hres(), we currently use whether the return value of __arch_get_ hw_counter() is negtive to indicate fallback, but this is not a good idea. Because:
1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid; 2, For a 64bit counter, a "negtive" value of counter is actually valid.
s/negtive/negative
What's the actual bug? Is it that MIPS is returning 0 but the check is < 0? Sounds like MIPS should get fixed.
I submitted a patch for this yesterday to the MIPS maintainers [1]. The MIPS32 r1 implementation had a bug when VDSO_CLOCK_NONE was set.
The issue has been reported by Maxime Bizon who tested the fix as well.
[1] https://patchwork.kernel.org/patch/11193391/
Hi, Andy,
On Fri, Oct 18, 2019 at 11:15 AM Andy Lutomirski luto@kernel.org wrote:
On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen chenhc@lemote.com wrote:
In do_hres(), we currently use whether the return value of __arch_get_ hw_counter() is negtive to indicate fallback, but this is not a good idea. Because:
1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid; 2, For a 64bit counter, a "negtive" value of counter is actually valid.
s/negtive/negative
What's the actual bug? Is it that MIPS is returning 0 but the check is < 0? Sounds like MIPS should get fixed.
My original bug is what Vincenzo said, MIPS has a boot failure if no valid clock_mode, and surely MIPS need to fix. However, when I try to fix it, I found that clock_getres() has another problem, because __cvdso_clock_getres_common() get vd[CS_HRES_COARSE].hrtimer_res, but hrtimer_res is set in update_vdso_data() which relies on __arch_use_vsyscall().
To solve this problem, we use U64_MAX as the only "invalid" return value -- this is still not fully correct, but has no problem in most cases.
I'm sort of okay with that, but...
Moreover, all vdso time-related functions should rely on the return value of __arch_use_vsyscall(), because update_vdso_data() and update_vsyscall_tz() also rely on it. So, in the core functions of __cvdso_gettimeofday(), __cvdso_clock_gettime() and __cvdso_clock_ getres(), if __arch_use_vsyscall() returns false, we use the fallback functions directly.
__arch_use_vsyscall() is not currently intended for use in the vDSO at all.
Fixes: 00b26474c2f1613d7ab894c5 ("lib/vdso: Provide generic VDSO implementation") Cc: stable@vger.kernel.org Cc: Arnd Bergmann arnd@arndb.de Cc: Paul Burton paul.burton@mips.com Cc: linux-mips@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Huacai Chen chenhc@lemote.com
arch/arm64/include/asm/vdso/vsyscall.h | 2 +- arch/mips/include/asm/vdso/vsyscall.h | 2 +- include/asm-generic/vdso/vsyscall.h | 2 +- lib/vdso/gettimeofday.c | 12 +++++++++++- 4 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 0c731bf..406e6de 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -31,7 +31,7 @@ int __arm64_get_clock_mode(struct timekeeper *tk) #define __arch_get_clock_mode __arm64_get_clock_mode
static __always_inline -int __arm64_use_vsyscall(struct vdso_data *vdata) +int __arm64_use_vsyscall(const struct vdso_data *vdata) { return !vdata[CS_HRES_COARSE].clock_mode; } diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h index 1953147..8b10dd7 100644 --- a/arch/mips/include/asm/vdso/vsyscall.h +++ b/arch/mips/include/asm/vdso/vsyscall.h @@ -29,7 +29,7 @@ int __mips_get_clock_mode(struct timekeeper *tk) #define __arch_get_clock_mode __mips_get_clock_mode
static __always_inline -int __mips_use_vsyscall(struct vdso_data *vdata) +int __mips_use_vsyscall(const struct vdso_data *vdata) { return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE); } diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index e94b1978..ac05a625 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -26,7 +26,7 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk) #endif /* __arch_get_clock_mode */
#ifndef __arch_use_vsyscall -static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata) +static __always_inline int __arch_use_vsyscall(const struct vdso_data *vdata) { return 1; } diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index e630e7f..4ad062e 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -9,6 +9,7 @@ #include <linux/hrtimer_defs.h> #include <vdso/datapage.h> #include <vdso/helpers.h> +#include <vdso/vsyscall.h>
/*
- The generic vDSO implementation requires that gettimeofday.h
@@ -50,7 +51,7 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, cycles = __arch_get_hw_counter(vd->clock_mode); ns = vdso_ts->nsec; last = vd->cycle_last;
if (unlikely((s64)cycles < 0))
if (unlikely(cycles == U64_MAX)) return -1;
I would actually prefer:
if (unlikely(cycles < last))
or perhaps:
if (unlikely((s64)(cycles-last) < 0))
which would have the nice side effect of getting rid of the annoying x86 special case in vdso_calc_delta(). The former version is compatible with U64_MAX, whereas the latter version would need the error case to return last-1 or similar. The benefit of the latter version is that it can survive wrap-around.
When you say if (unlikely(cycles < last)), do you means if (unlikely(cycles <= last))? If __arch_get_hw_counter() return U64_MAX every time, I don't think cycles can be less than last.
Huacai
ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -91,6 +92,9 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1;
if (!__arch_use_vsyscall(vd))
return -1;
NAK. I don't think this is helpful or correct. It doesn't appear to do anything valid, and it's racy.
/* * Convert the clockid to a bitmask and use it to check which * clocks are handled in the VDSO directly.
@@ -145,6 +149,9 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { const struct vdso_data *vd = __arch_get_vdso_data();
if (!__arch_use_vsyscall(vd))
return gettimeofday_fallback(tv, tz);
Ditto.
if (likely(tv != NULL)) { struct __kernel_timespec ts;
@@ -189,6 +196,9 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1;
if (!__arch_use_vsyscall(vd))
return -1;
Ditto.
On Sat, 19 Oct 2019, Huacai Chen wrote:
On Fri, Oct 18, 2019 at 11:15 AM Andy Lutomirski luto@kernel.org wrote:
On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen chenhc@lemote.com wrote:
In do_hres(), we currently use whether the return value of __arch_get_ hw_counter() is negtive to indicate fallback, but this is not a good idea. Because:
1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid; 2, For a 64bit counter, a "negtive" value of counter is actually valid.
s/negtive/negative
What's the actual bug? Is it that MIPS is returning 0 but the check is < 0? Sounds like MIPS should get fixed.
My original bug is what Vincenzo said, MIPS has a boot failure if no valid clock_mode, and surely MIPS need to fix. However, when I try to fix it, I found that clock_getres() has another problem, because __cvdso_clock_getres_common() get vd[CS_HRES_COARSE].hrtimer_res, but hrtimer_res is set in update_vdso_data() which relies on __arch_use_vsyscall().
__arch_use_vsyscall() is a pointless exercise TBH. The VDSO data should be updated unconditionally so all the trivial interfaces like time() and getres() just work independently of the functions which depend on the underlying clocksource.
This functions have a fallback operation already:
Let __arch_get_hw_counter() return U64_MAX and the syscall fallback is invoked.
__arch_use_vsyscall() should just be removed.
Thanks,
tglx
On Sat, Oct 19, 2019 at 3:01 AM Thomas Gleixner tglx@linutronix.de wrote:
On Sat, 19 Oct 2019, Huacai Chen wrote:
On Fri, Oct 18, 2019 at 11:15 AM Andy Lutomirski luto@kernel.org wrote:
On Thu, Oct 17, 2019 at 7:57 PM Huacai Chen chenhc@lemote.com wrote:
In do_hres(), we currently use whether the return value of __arch_get_ hw_counter() is negtive to indicate fallback, but this is not a good idea. Because:
1, ARM64 returns ULL_MAX but MIPS returns 0 when clock_mode is invalid; 2, For a 64bit counter, a "negtive" value of counter is actually valid.
s/negtive/negative
What's the actual bug? Is it that MIPS is returning 0 but the check is < 0? Sounds like MIPS should get fixed.
My original bug is what Vincenzo said, MIPS has a boot failure if no valid clock_mode, and surely MIPS need to fix. However, when I try to fix it, I found that clock_getres() has another problem, because __cvdso_clock_getres_common() get vd[CS_HRES_COARSE].hrtimer_res, but hrtimer_res is set in update_vdso_data() which relies on __arch_use_vsyscall().
__arch_use_vsyscall() is a pointless exercise TBH. The VDSO data should be updated unconditionally so all the trivial interfaces like time() and getres() just work independently of the functions which depend on the underlying clocksource.
This functions have a fallback operation already:
Let __arch_get_hw_counter() return U64_MAX and the syscall fallback is invoked.
My thought was that __arch_get_hw_counter() could return last-1 to indicate failure, which would allow the two checks to be folded into one check. Or we could continue to use U64_MAX and rely on the fact that (s64)U64_MAX < 0, not worry about the cycle counter overflowing, and letting cycles < last catch it.
(And we should change it to return s64 at some point regardless -- all the math is signed, so the underlying types should be too IMO.)
On Sun, 20 Oct 2019, Andy Lutomirski wrote:
On Sat, Oct 19, 2019 at 3:01 AM Thomas Gleixner tglx@linutronix.de wrote:
__arch_use_vsyscall() is a pointless exercise TBH. The VDSO data should be updated unconditionally so all the trivial interfaces like time() and getres() just work independently of the functions which depend on the underlying clocksource.
This functions have a fallback operation already:
Let __arch_get_hw_counter() return U64_MAX and the syscall fallback is invoked.
My thought was that __arch_get_hw_counter() could return last-1 to indicate failure, which would allow the two checks to be folded into one check. Or we could continue to use U64_MAX and rely on the fact that (s64)U64_MAX < 0, not worry about the cycle counter overflowing, and letting cycles < last catch it.
This is not an overflow catch. It's solely to deal with the fact that on X86 you can observe (cycles < last) on multi socket systems under rare circumstances. Any other architecture does not have that issue AFAIK.
The wraparound of clocksources with a smaller width than 64bit is handled by:
delta = (cycles - last) & mask;
which operates on unsigned values for obvious reasons.
(And we should change it to return s64 at some point regardless -- all the math is signed, so the underlying types should be too IMO.)
See above. delta is guaranteed to be >= 0 and the mult/shift is not signed either. All the base values which are in the VDSO are unsigned as well.
The weird typecast there:
if ((s64)cycles < 0)
could as well be
if (cycles == U64_MAX)
but the typecasted version creates better code.
I tried to fold the two operations (see patch below) and on all machines I tested on (various generations of Intel and AMD) the result is slower than what we have now by a couple of cycles, which is a lot for these functions (i.e. between 3-5%). I'm sure I tried that before and ended up with the existing code as the fastest variant.
Why? That's subject to speculation :)
Thanks,
tglx
8<---------- arch/x86/include/asm/vdso/gettimeofday.h | 39 ++++++------------------------- lib/vdso/gettimeofday.c | 23 +++--------------- 2 files changed, 13 insertions(+), 49 deletions(-)
--- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -235,10 +235,14 @@ static u64 vread_hvclock(void) } #endif
-static inline u64 __arch_get_hw_counter(s32 clock_mode) +static inline u64 __arch_get_hw_counter(s32 clock_mode, u64 last, u64 mask) { + /* + * Mask operation is not required as all VDSO clocksources are + * 64bit wide. + */ if (clock_mode == VCLOCK_TSC) - return (u64)rdtsc_ordered(); + return (u64)rdtsc_ordered() - last; /* * For any memory-mapped vclock type, we need to make sure that gcc * doesn't cleverly hoist a load before the mode check. Otherwise we @@ -248,13 +252,13 @@ static inline u64 __arch_get_hw_counter( #ifdef CONFIG_PARAVIRT_CLOCK if (clock_mode == VCLOCK_PVCLOCK) { barrier(); - return vread_pvclock(); + return vread_pvclock() - last; } #endif #ifdef CONFIG_HYPERV_TIMER if (clock_mode == VCLOCK_HVCLOCK) { barrier(); - return vread_hvclock(); + return vread_hvclock() - last; } #endif return U64_MAX; @@ -265,33 +269,6 @@ static __always_inline const struct vdso return __vdso_data; }
-/* - * x86 specific delta calculation. - * - * The regular implementation assumes that clocksource reads are globally - * monotonic. The TSC can be slightly off across sockets which can cause - * the regular delta calculation (@cycles - @last) to return a huge time - * jump. - * - * Therefore it needs to be verified that @cycles are greater than - * @last. If not then use @last, which is the base time of the current - * conversion period. - * - * This variant also removes the masking of the subtraction because the - * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX - * which would result in a pointless operation. The compiler cannot - * optimize it away as the mask comes from the vdso data and is not compile - * time constant. - */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - if (cycles > last) - return (cycles - last) * mult; - return 0; -} -#define vdso_calc_delta vdso_calc_delta - #endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -26,34 +26,21 @@ #include <asm/vdso/gettimeofday.h> #endif /* ENABLE_COMPAT_VDSO */
-#ifndef vdso_calc_delta -/* - * Default implementation which works for all sane clocksources. That - * obviously excludes x86/TSC. - */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return ((cycles - last) & mask) * mult; -} -#endif - static int do_hres(const struct vdso_data *vd, clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; - u64 cycles, last, sec, ns; + u64 delta, sec, ns; u32 seq;
do { seq = vdso_read_begin(vd); - cycles = __arch_get_hw_counter(vd->clock_mode); - ns = vdso_ts->nsec; - last = vd->cycle_last; - if (unlikely((s64)cycles < 0)) + delta = __arch_get_hw_counter(vd->clock_mode, vd->cycle_last, + vd->mask); + if (unlikely((s64)delta < 0)) return -1;
- ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); + ns = vdso_ts->nsec + delta * vd->mult; ns >>= vd->shift; sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq)));
linux-stable-mirror@lists.linaro.org