The lifetime managment uses '__u64' timestamps on the user space interface, but 'unsigned long' for reading the current time in the kernel with get_seconds().
While this is probably safe beyond y2038, it will still overflow in 2106, and the get_seconds() call is deprecated because fo that.
This changes the xfrm time handling to use time64_t consistently, along with reading the time using the safer ktime_get_real_seconds(). It still suffers from problems that can happen from a concurrent settimeofday() call or (to a lesser degree) a leap second update, but since the time stamps are part of the user API, there is nothing we can do to prevent that.
Signed-off-by: Arnd Bergmann arnd@arndb.de --- net/xfrm/xfrm_policy.c | 24 ++++++++++++------------ net/xfrm/xfrm_state.c | 10 +++++----- 2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5f48251c1319..9f4afca8b4f5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -182,8 +182,8 @@ static inline unsigned long make_jiffies(long secs) static void xfrm_policy_timer(struct timer_list *t) { struct xfrm_policy *xp = from_timer(xp, t, timer); - unsigned long now = get_seconds(); - long next = LONG_MAX; + time64_t now = ktime_get_real_seconds(); + time64_t next = TIME64_MAX; int warn = 0; int dir;
@@ -195,7 +195,7 @@ static void xfrm_policy_timer(struct timer_list *t) dir = xfrm_policy_id2dir(xp->index);
if (xp->lft.hard_add_expires_seconds) { - long tmo = xp->lft.hard_add_expires_seconds + + time64_t tmo = xp->lft.hard_add_expires_seconds + xp->curlft.add_time - now; if (tmo <= 0) goto expired; @@ -203,7 +203,7 @@ static void xfrm_policy_timer(struct timer_list *t) next = tmo; } if (xp->lft.hard_use_expires_seconds) { - long tmo = xp->lft.hard_use_expires_seconds + + time64_t tmo = xp->lft.hard_use_expires_seconds + (xp->curlft.use_time ? : xp->curlft.add_time) - now; if (tmo <= 0) goto expired; @@ -211,7 +211,7 @@ static void xfrm_policy_timer(struct timer_list *t) next = tmo; } if (xp->lft.soft_add_expires_seconds) { - long tmo = xp->lft.soft_add_expires_seconds + + time64_t tmo = xp->lft.soft_add_expires_seconds + xp->curlft.add_time - now; if (tmo <= 0) { warn = 1; @@ -221,7 +221,7 @@ static void xfrm_policy_timer(struct timer_list *t) next = tmo; } if (xp->lft.soft_use_expires_seconds) { - long tmo = xp->lft.soft_use_expires_seconds + + time64_t tmo = xp->lft.soft_use_expires_seconds + (xp->curlft.use_time ? : xp->curlft.add_time) - now; if (tmo <= 0) { warn = 1; @@ -233,7 +233,7 @@ static void xfrm_policy_timer(struct timer_list *t)
if (warn) km_policy_expired(xp, dir, 0, 0); - if (next != LONG_MAX && + if (next != TIME64_MAX && !mod_timer(&xp->timer, jiffies + make_jiffies(next))) xfrm_pol_hold(xp);
@@ -783,7 +783,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); - policy->curlft.add_time = get_seconds(); + policy->curlft.add_time = ktime_get_real_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); @@ -1268,7 +1268,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) old_pol = rcu_dereference_protected(sk->sk_policy[dir], lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { - pol->curlft.add_time = get_seconds(); + pol->curlft.add_time = ktime_get_real_seconds(); pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); xfrm_sk_policy_link(pol, dir); } @@ -2234,7 +2234,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, }
for (i = 0; i < num_pols; i++) - pols[i]->curlft.use_time = get_seconds(); + pols[i]->curlft.use_time = ktime_get_real_seconds();
if (num_xfrms < 0) { /* Prohibit the flow */ @@ -2446,7 +2446,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, return 1; }
- pol->curlft.use_time = get_seconds(); + pol->curlft.use_time = ktime_get_real_seconds();
pols[0] = pol; npols++; @@ -2460,7 +2460,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } - pols[1]->curlft.use_time = get_seconds(); + pols[1]->curlft.use_time = ktime_get_real_seconds(); npols++; } } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8308281f3253..1c2b6fdd9518 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -475,8 +475,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) { struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); - unsigned long now = get_seconds(); - long next = LONG_MAX; + time64_t now = ktime_get_real_seconds(); + time64_t next = TIME64_MAX; int warn = 0; int err = 0;
@@ -537,7 +537,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX) { + if (next != TIME64_MAX) { tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); }
@@ -577,7 +577,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_BOOTTIME, HRTIMER_MODE_ABS); timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); - x->curlft.add_time = get_seconds(); + x->curlft.add_time = ktime_get_real_seconds(); x->lft.soft_byte_limit = XFRM_INF; x->lft.soft_packet_limit = XFRM_INF; x->lft.hard_byte_limit = XFRM_INF; @@ -1571,7 +1571,7 @@ EXPORT_SYMBOL(xfrm_state_update); int xfrm_state_check_expire(struct xfrm_state *x) { if (!x->curlft.use_time) - x->curlft.use_time = get_seconds(); + x->curlft.use_time = ktime_get_real_seconds();
if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) {
get_seconds() is deprecated because it can overflow on 32-bit architectures. For the xfrm_state->lastused member, we treat the data as a 64-bit number already, so we just need to use the right accessor that works on both 32-bit and 64-bit machines.
Signed-off-by: Arnd Bergmann arnd@arndb.de --- include/net/xfrm.h | 2 +- net/ipv6/xfrm6_mode_ro.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 557122846e0e..d9031415402f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -225,7 +225,7 @@ struct xfrm_state { long saved_tmo;
/* Last used time */ - unsigned long lastused; + time64_t lastused;
struct page_frag xfrag;
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 07d36573f50b..da28e4407b8f 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -55,7 +55,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) __skb_pull(skb, hdr_len); memmove(ipv6_hdr(skb), iph, hdr_len);
- x->lastused = get_seconds(); + x->lastused = ktime_get_real_seconds();
return 0; }
Using get_seconds() for timestamps is deprecated since it can lead to overflows on 32-bit systems. While the interface generally doesn't overflow until year 2106, the specific implementation of the TCP PAWS algorithm breaks in 2038 when the intermediate signed 32-bit timestamps overflow.
A related problem is that the local timestamps in CLOCK_REALTIME form lead to unexpected behavior when settimeofday is called to set the system clock backwards or forwards by more than 24 days.
While the first problem could be solved by using an overflow-safe method of comparing the timestamps, a nicer solution is to use a monotonic clocksource with ktime_get_seconds() that simply doesn't overflow (at least not until 136 years after boot) and that doesn't change during settimeofday().
To make 32-bit and 64-bit architectures behave the same way here, and also save a few bytes in the tcp_options_received structure, I'm changing the type to a 32-bit integer, which is now safe on all architectures.
Finally, the ts_recent_stamp field also (confusingly) gets used to store a jiffies value in tcp_synq_overflow()/tcp_synq_no_recent_overflow(). This is currently safe, but changing the type to 32-bit requires some small changes there to keep it working.
Signed-off-by: Arnd Bergmann arnd@arndb.de --- drivers/crypto/chelsio/chtls/chtls_cm.c | 2 +- include/linux/tcp.h | 4 ++-- include/net/tcp.h | 15 ++++++++------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 8 ++++---- 6 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 2bb6f0380758..0997e166ea57 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1673,7 +1673,7 @@ static void chtls_timewait(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk);
tp->rcv_nxt++; - tp->rx_opt.ts_recent_stamp = get_seconds(); + tp->rx_opt.ts_recent_stamp = ktime_get_seconds(); tp->srtt_us = 0; tcp_time_wait(sk, TCP_TIME_WAIT, 0); } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 72705eaf4b84..f911b9b09b16 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -89,7 +89,7 @@ struct tcp_sack_block {
struct tcp_options_received { /* PAWS/RTTM data */ - long ts_recent_stamp;/* Time we stored ts_recent (for aging) */ + int ts_recent_stamp;/* Time we stored ts_recent (for aging) */ u32 ts_recent; /* Time stamp to echo next */ u32 rcv_tsval; /* Time stamp value */ u32 rcv_tsecr; /* Time stamp echo reply */ @@ -425,7 +425,7 @@ struct tcp_timewait_sock { /* The time we sent the last out-of-window ACK: */ u32 tw_last_oow_ack_time;
- long tw_ts_recent_stamp; + int tw_ts_recent_stamp; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 0448e7c5d2b4..f8c32dc36ea1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -471,19 +471,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); */ static inline void tcp_synq_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - unsigned long now = jiffies; + unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned int now = jiffies;
- if (time_after(now, last_overflow + HZ)) + if (time_after32(now, last_overflow + HZ)) tcp_sk(sk)->rx_opt.ts_recent_stamp = now; }
/* syncookies: no recent synqueue overflow on this listening socket? */ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned int now = jiffies;
- return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); + return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID); }
static inline u32 tcp_cookie_time(void) @@ -1361,7 +1362,7 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt, { if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) return true; - if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) + if (unlikely(ktime_get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) return true; /* * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, @@ -1391,7 +1392,7 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
However, we can relax time bounds for RST segments to MSL. */ - if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL) + if (rst && ktime_get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL) return false; return true; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 355d3dffd021..0eb314774aec 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3449,7 +3449,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) static void tcp_store_ts_recent(struct tcp_sock *tp) { tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); + tp->rx_opt.ts_recent_stamp = ktime_get_seconds(); }
static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bea17f1e8302..41d03153c5bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -155,7 +155,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { + (!twp || (reuse && ktime_get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) tp->write_seq = 1; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1dda1341a223..1f652beb79ca 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -144,7 +144,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tw->tw_substate = TCP_TIME_WAIT; tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (tmp_opt.saw_tstamp) { - tcptw->tw_ts_recent_stamp = get_seconds(); + tcptw->tw_ts_recent_stamp = ktime_get_seconds(); tcptw->tw_ts_recent = tmp_opt.rcv_tsval; }
@@ -189,7 +189,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; - tcptw->tw_ts_recent_stamp = get_seconds(); + tcptw->tw_ts_recent_stamp = ktime_get_seconds(); }
inet_twsk_put(tw); @@ -534,7 +534,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
if (newtp->rx_opt.tstamp_ok) { newtp->rx_opt.ts_recent = req->ts_recent; - newtp->rx_opt.ts_recent_stamp = get_seconds(); + newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { newtp->rx_opt.ts_recent_stamp = 0; @@ -600,7 +600,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * it can be estimated (approximately) * from another data. */ - tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout); + tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout); paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } }
On 06/18/2018 08:22 AM, Arnd Bergmann wrote:
Using get_seconds() for timestamps is deprecated since it can lead to overflows on 32-bit systems. While the interface generally doesn't overflow until year 2106, the specific implementation of the TCP PAWS algorithm breaks in 2038 when the intermediate signed 32-bit timestamps overflow.
...
static inline u32 tcp_cookie_time(void) @@ -1361,7 +1362,7 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt, { if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) return true;
- if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
- if (unlikely(ktime_get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) return true; /*
- Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1391,7 +1392,7 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, However, we can relax time bounds for RST segments to MSL. */
- if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
- if (rst && ktime_get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL) return false; return true;
Please use the time_after32(), since ktime_get_seconds() is time64_t while ts_recent_stamp is int.
Same remark for tcp_twsk_unique()
Lets clean up this stuff, thanks !
On Mon, Jun 18, 2018 at 5:59 PM, Eric Dumazet eric.dumazet@gmail.com wrote:
On 06/18/2018 08:22 AM, Arnd Bergmann wrote:
Please use the time_after32(), since ktime_get_seconds() is time64_t while ts_recent_stamp is int.
Same remark for tcp_twsk_unique()
Lets clean up this stuff, thanks !
Sure, no problem. Note that as ktime_get_seconds() uses CLOCK_MONOTONIC, it is guaranteed to return a number that fits into a positive 32-bit number (for about 136 years after boot to be precise). Most users of time_after() or time_after32() are for jiffies values that do overflow regularly, but it doesn't hurt to use time_after32() here for illustration.
Arnd