5.15-stable review patch. If anyone has any objections, please let me know.
------------------
From: Eric Dumazet edumazet@google.com
[ Upstream commit e13ec3da05d130f0d10da8e1fbe1be26dcdb0e27 ]
tcp_poll() reads sk->sk_err without socket lock held/owned.
We should used READ_ONCE() here, and update writers to use WRITE_ONCE().
Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Stable-dep-of: 853c3bd7b791 ("tcp: fix race in tcp_write_err()") Signed-off-by: Sasha Levin sashal@kernel.org --- net/ipv4/tcp.c | 11 ++++++----- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- 6 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c1602b36dee42..db3bfe1a8443c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -594,7 +594,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR;
return mask; @@ -2995,7 +2996,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { - sk->sk_err = ECONNABORTED; + WRITE_ONCE(sk->sk_err, ECONNABORTED); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -3003,9 +3004,9 @@ int tcp_disconnect(struct sock *sk, int flags) * states */ tcp_send_active_reset(sk, gfp_any()); - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET);
tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); @@ -4513,7 +4514,7 @@ int tcp_abort(struct sock *sk, int err) bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d3273d6dce7e9..bb20ae8dba09b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4362,15 +4362,15 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 901c873fbaf5a..a1ed81ff9a81d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -593,7 +593,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
@@ -622,7 +622,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { /* Only an error on timeout */ WRITE_ONCE(sk->sk_err_soft, err); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0fb84e57a2d49..2c9670c832020 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3724,7 +3724,7 @@ static void tcp_connect_init(struct sock *sk) tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd;
- sk->sk_err = 0; + WRITE_ONCE(sk->sk_err, 0); sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 1a26130807f73..ed01b775b8947 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
static void tcp_write_err(struct sock *sk) { - sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; + WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); sk_error_report(sk);
tcp_write_queue_purge(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index afdaa2e3cb6ef..c9aee34ae469f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -486,7 +486,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
tcp_done(sk); @@ -506,7 +506,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, }
if (!sock_owned_by_user(sk) && np->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err);