On Fri, Apr 18 2025 at 08:37, Thomas Gleixner wrote:
On Thu, Apr 17 2025 at 17:46, John Stultz wrote:
Instead it seems like we should just do: tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
You end up with the same problem again because xtime_nsec can move backwards when the multiplier is updated, no?
Something like the below should work.
Thanks,
tglx --- include/linux/timekeeper_internal.h | 10 ++++- kernel/time/timekeeping.c | 62 ++++++++++++++++++++++++++++++++---- kernel/time/vsyscall.c | 4 +- 3 files changed, 65 insertions(+), 11 deletions(-)
--- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_nsec: The nanoseconds part for coarse time getters * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -76,6 +76,8 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_offset: The offset of the coarse timekeeper in clock cycles * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -100,7 +102,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... tai_offset + * 1: xtime_sec ... coarse_nsec * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,7 +123,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - s32 tai_offset; + u32 coarse_nsec;
/* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -144,6 +146,8 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s32 tai_offset; + u32 coarse_offset; };
#ifdef CONFIG_GENERIC_TIME_VSYSCALL --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -164,6 +164,15 @@ static inline struct timespec64 tk_xtime return ts; }
+static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk) +{ + struct timespec64 ts; + + ts.tv_sec = tk->xtime_sec; + ts.tv_nsec = tk->coarse_nsec; + return ts; +} + static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; @@ -252,6 +261,7 @@ static void tk_setup_internals(struct ti tk->tkr_raw.clock = clock; tk->tkr_raw.mask = clock->mask; tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; + tk->coarse_offset = 0;
/* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; @@ -636,6 +646,34 @@ static void timekeeping_restore_shadow(s memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper)); }
+/* + * Update the nanoseconds part for the coarse time keepers. They can't rely + * on xtime_nsec because xtime_nsec is adjusted when the multiplication + * factor of the clock is adjusted. See timekeeping_apply_adjustment(). + * + * This is required because tk_read::cycle_last must be advanced by + * timekeeper::cycle_interval so that the accumulation happens with a + * periodic reference. + * + * But that adjustment of xtime_nsec can make it go backward to compensate + * for a larger multiplicator. + * + * timekeeper::offset contains the leftover cycles which were not accumulated. + * Therefore the nanoseconds portion of the time when the clocksource was + * read in timekeeping_advance() is: + * + * nsec = (xtime_nsec + offset * mult) >> shift; + * + * Calculate that value and store it in timekeeper::coarse_nsec, from where + * the coarse time getters consume it. + */ +static inline void tk_update_coarse_nsecs(struct timekeeper *tk) +{ + u64 offset = (u64)tk->coarse_offset * tk->tkr_mono.mult; + + tk->coarse_nsec = (tk->tkr_mono.xtime_nsec + offset) >> tk->tkr_mono.shift; +} + static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action) { struct timekeeper *tk = &tk_core.shadow_timekeeper; @@ -658,6 +696,7 @@ static void timekeeping_update_from_shad
tk_update_leap_state(tk); tk_update_ktime_data(tk); + tk_update_coarse_nsecs(tk);
update_vsyscall(tk); update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); @@ -708,6 +747,12 @@ static void timekeeping_forward_now(stru tk_normalize_xtime(tk); delta -= incr; } + + /* + * Clear the offset for the coarse time as the above forward + * brought the offset down to zero. + */ + tk->coarse_offset = 0; }
/** @@ -804,8 +849,8 @@ EXPORT_SYMBOL_GPL(ktime_get_with_offset) ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) { struct timekeeper *tk = &tk_core.timekeeper; - unsigned int seq; ktime_t base, *offset = offsets[offs]; + unsigned int seq; u64 nsecs;
WARN_ON(timekeeping_suspended); @@ -813,7 +858,7 @@ ktime_t ktime_get_coarse_with_offset(enu do { seq = read_seqcount_begin(&tk_core.seq); base = ktime_add(tk->tkr_mono.base, *offset); - nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsecs = tk->coarse_nsec;
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1831,6 +1876,8 @@ void timekeeping_resume(void) /* Re-base the last cycle value */ tks->tkr_mono.cycle_last = cycle_now; tks->tkr_raw.cycle_last = cycle_now; + /* Reset the offset for the coarse time getters */ + tks->coarse_offset = 0;
tks->ntp_error = 0; timekeeping_suspended = 0; @@ -2205,6 +2252,9 @@ static bool timekeeping_advance(enum tim */ clock_set |= accumulate_nsecs_to_secs(tk);
+ /* Compensate the coarse time getters xtime_nsec offset */ + tk->coarse_offset = (u32)offset; + timekeeping_update_from_shadow(&tk_core, clock_set);
return !!clock_set; @@ -2248,7 +2298,7 @@ void ktime_get_coarse_real_ts64(struct t do { seq = read_seqcount_begin(&tk_core.seq);
- *ts = tk_xtime(tk); + *ts = tk_xtime_coarse(tk); } while (read_seqcount_retry(&tk_core.seq, seq)); } EXPORT_SYMBOL(ktime_get_coarse_real_ts64); @@ -2271,7 +2321,7 @@ void ktime_get_coarse_real_ts64_mg(struc
do { seq = read_seqcount_begin(&tk_core.seq); - *ts = tk_xtime(tk); + *ts = tk_xtime_coarse(tk); offset = tk_core.timekeeper.offs_real; } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -2350,12 +2400,12 @@ void ktime_get_coarse_ts64(struct timesp do { seq = read_seqcount_begin(&tk_core.seq);
- now = tk_xtime(tk); + now = tk_xtime_coarse(tk); mono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq));
set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, - now.tv_nsec + mono.tv_nsec); + now.tv_nsec + mono.tv_nsec); } EXPORT_SYMBOL(ktime_get_coarse_ts64);
--- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -98,12 +98,12 @@ void update_vsyscall(struct timekeeper * /* CLOCK_REALTIME_COARSE */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; vdso_ts->sec = tk->xtime_sec; - vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + vdso_ts->nsec = tk->coarse_nsec;
/* CLOCK_MONOTONIC_COARSE */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE]; vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec = tk->coarse_nsec; nsec = nsec + tk->wall_to_monotonic.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec);