From: Paolo Abeni pabeni@redhat.com
commit 57fc0f1ceaa4016354cf6f88533e20b56190e41a upstream.
The MPTCP protocol access the listener subflow in a lockless manner in a couple of places (poll, diag). That works only if the msk itself leaves the listener status only after that the subflow itself has been closed/disconnected. Otherwise we risk deadlock in diag, as reported by Christoph.
Address the issue ensuring that the first subflow (the listener one) is always disconnected before updating the msk socket status.
Reported-by: Christoph Paasch cpaasch@apple.com Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/407 Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- net/mptcp/pm_netlink.c | 1 + net/mptcp/protocol.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-)
--- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_soc if (err) return err;
+ inet_sk_state_store(newsk, TCP_LISTEN); err = kernel_listen(ssock, backlog); if (err) return err; --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2385,13 +2385,6 @@ static void __mptcp_close_ssk(struct soc kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } - __tcp_close(ssk, 0);
/* close acquired an extra ref */ @@ -2926,10 +2919,24 @@ static __poll_t mptcp_check_readable(str return EPOLLIN | EPOLLRDNORM; }
-static void mptcp_listen_inuse_dec(struct sock *sk) +static void mptcp_check_listen_stop(struct sock *sk) { - if (inet_sk_state_load(sk) == TCP_LISTEN) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + struct sock *ssk; + + if (inet_sk_state_load(sk) != TCP_LISTEN) + return; + + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + ssk = mptcp_sk(sk)->first; + if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN)) + return; + + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); + tcp_set_state(ssk, TCP_CLOSE); + release_sock(ssk); }
bool __mptcp_close(struct sock *sk, long timeout) @@ -2942,7 +2949,7 @@ bool __mptcp_close(struct sock *sk, long WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); goto cleanup; } @@ -3056,7 +3063,7 @@ static int mptcp_disconnect(struct sock if (msk->fastopening) return -EBUSY;
- mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);