This small series have various unrelated patches:
- Patch 1 and 2: improve code coverage by validating mptcp_diag_dump_one thanks to a new tool displaying MPTCP info for a specific token.
- Patch 3: a fix for a commit which is only in net-next.
- Patch 4: reduce parameters for one in-kernel PM helper.
- Patch 5: exit early when processing an ADD_ADDR echo to avoid unneeded operations.
Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- Gang Yan (2): selftests: mptcp: Add a tool to get specific msk_info selftests: mptcp: add a test for mptcp_diag_dump_one
Geliang Tang (2): mptcp: pm: in-kernel: avoid access entry without lock mptcp: pm: in-kernel: reduce parameters of set_flags
Matthieu Baerts (NGI0) (1): mptcp: pm: exit early with ADD_ADDR echo if possible
net/mptcp/pm.c | 3 + net/mptcp/pm_netlink.c | 15 +- tools/testing/selftests/net/mptcp/Makefile | 2 +- tools/testing/selftests/net/mptcp/diag.sh | 27 +++ tools/testing/selftests/net/mptcp/mptcp_diag.c | 272 +++++++++++++++++++++++++ 5 files changed, 311 insertions(+), 8 deletions(-) --- base-commit: 56794b5862c5a9aefcf2b703257c6fb93f76573e change-id: 20250228-net-next-mptcp-coverage-small-opti-70d8dc1d329d
Best regards,
From: Gang Yan yangang@kylinos.cn
This patch enables the retrieval of the mptcp_info structure corresponding to a specified MPTCP socket (msk). When multiple MPTCP connections are present, specific information can be obtained for a given connection through the 'mptcp_diag_dump_one' by using the 'token' associated with the msk.
Signed-off-by: Gang Yan yangang@kylinos.cn Co-developed-by: Matthieu Baerts (NGI0) matttbe@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- tools/testing/selftests/net/mptcp/Makefile | 2 +- tools/testing/selftests/net/mptcp/mptcp_diag.c | 272 +++++++++++++++++++++++++ 2 files changed, 273 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index c76525fe2b84d503646429d1ddfb4a323a722ae9..340e1a777e16a893300e01d19fdb5191fab8e00b 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -7,7 +7,7 @@ CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INC TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq +TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
TEST_FILES := mptcp_lib.sh settings
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c new file mode 100644 index 0000000000000000000000000000000000000000..284286c524cfeff5f49b0af1a4da5a376c9e3140 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Kylin Software */ + +#include <linux/sock_diag.h> +#include <linux/rtnetlink.h> +#include <linux/inet_diag.h> +#include <linux/netlink.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <linux/tcp.h> + +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; + __u8 mptcpi_subflows_total; + __u8 reserved[3]; + __u32 mptcpi_last_data_sent; + __u32 mptcpi_last_data_recv; + __u32 mptcpi_last_ack_recv; +}; + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage: mptcp_diag -t\n"); + exit(r); +} + +static void send_query(int fd, __u32 token) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct { + struct nlmsghdr nlh; + struct inet_diag_req_v2 r; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .r = { + .sdiag_family = AF_INET, + /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ + .sdiag_protocol = IPPROTO_TCP, + .id.idiag_cookie[0] = token, + } + }; + struct rtattr rta_proto; + struct iovec iov[6]; + int iovlen = 1; + __u32 proto; + + req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1)); + proto = IPPROTO_MPTCP; + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + + iov[0] = (struct iovec) { + .iov_base = &req, + .iov_len = sizeof(req) + }; + iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; + iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)}; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + iovlen += 2; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen + }; + + for (;;) { + if (sendmsg(fd, &msg, 0) < 0) { + if (errno == EINTR) + continue; + die_perror("sendmsg"); + } + break; + } +} + +static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta, + int len, unsigned short flags) +{ + unsigned short type; + + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + type = rta->rta_type & ~flags; + if (type <= max && !tb[type]) + tb[type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +static void print_info_msg(struct mptcp_info *info) +{ + printf("Token & Flags\n"); + printf("token: %x\n", info->mptcpi_token); + printf("flags: %x\n", info->mptcpi_flags); + printf("csum_enabled: %u\n", info->mptcpi_csum_enabled); + + printf("\nBasic Info\n"); + printf("subflows: %u\n", info->mptcpi_subflows); + printf("subflows_max: %u\n", info->mptcpi_subflows_max); + printf("subflows_total: %u\n", info->mptcpi_subflows_total); + printf("local_addr_used: %u\n", info->mptcpi_local_addr_used); + printf("local_addr_max: %u\n", info->mptcpi_local_addr_max); + printf("add_addr_signal: %u\n", info->mptcpi_add_addr_signal); + printf("add_addr_accepted: %u\n", info->mptcpi_add_addr_accepted); + printf("add_addr_signal_max: %u\n", info->mptcpi_add_addr_signal_max); + printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max); + + printf("\nTransmission Info\n"); + printf("write_seq: %llu\n", info->mptcpi_write_seq); + printf("snd_una: %llu\n", info->mptcpi_snd_una); + printf("rcv_nxt: %llu\n", info->mptcpi_rcv_nxt); + printf("last_data_sent: %u\n", info->mptcpi_last_data_sent); + printf("last_data_recv: %u\n", info->mptcpi_last_data_recv); + printf("last_ack_recv: %u\n", info->mptcpi_last_ack_recv); + printf("retransmits: %u\n", info->mptcpi_retransmits); + printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans); + printf("bytes_sent: %llu\n", info->mptcpi_bytes_sent); + printf("bytes_received: %llu\n", info->mptcpi_bytes_received); + printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked); +} + +static void parse_nlmsg(struct nlmsghdr *nlh) +{ + struct inet_diag_msg *r = NLMSG_DATA(nlh); + struct rtattr *tb[INET_DIAG_MAX + 1]; + + parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)), + NLA_F_NESTED); + + if (tb[INET_DIAG_INFO]) { + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + struct mptcp_info *info; + + /* workaround fort older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else { + info = RTA_DATA(tb[INET_DIAG_INFO]); + } + print_info_msg(info); + } +} + +static void recv_nlmsg(int fd, struct nlmsghdr *nlh) +{ + char rcv_buff[8192]; + struct sockaddr_nl rcv_nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec rcv_iov = { + .iov_base = rcv_buff, + .iov_len = sizeof(rcv_buff) + }; + struct msghdr rcv_msg = { + .msg_name = &rcv_nladdr, + .msg_namelen = sizeof(rcv_nladdr), + .msg_iov = &rcv_iov, + .msg_iovlen = 1 + }; + int len; + + len = recvmsg(fd, &rcv_msg, 0); + nlh = (struct nlmsghdr *)rcv_buff; + + while (NLMSG_OK(nlh, len)) { + if (nlh->nlmsg_type == NLMSG_DONE) { + printf("NLMSG_DONE\n"); + break; + } else if (nlh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + printf("Error %d:%s\n", + -(err->error), strerror(-(err->error))); + break; + } + parse_nlmsg(nlh); + nlh = NLMSG_NEXT(nlh, len); + } +} + +static void get_mptcpinfo(__u32 token) +{ + struct nlmsghdr *nlh = NULL; + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + send_query(fd, token); + recv_nlmsg(fd, nlh); + + close(fd); +} + +static void parse_opts(int argc, char **argv, __u32 *target_token) +{ + int c; + + if (argc < 2) + die_usage(1); + + while ((c = getopt(argc, argv, "ht:")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case 't': + sscanf(optarg, "%x", target_token); + break; + default: + die_usage(1); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + __u32 target_token; + + parse_opts(argc, argv, &target_token); + get_mptcpinfo(target_token); + + return 0; +} +
From: Gang Yan yangang@kylinos.cn
This patch introduces a new 'chk_diag' test in diag.sh. It retrieves the token for a specified MPTCP socket (msk) using the 'ss' command and then accesses the 'mptcp_diag_dump_one' in kernel via ./mptcp_diag to verify if the correct token is returned.
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/524 Signed-off-by: Gang Yan yangang@kylinos.cn Reviewed-by: Matthieu Baerts (NGI0) matttbe@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- tools/testing/selftests/net/mptcp/diag.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+)
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 2bd0c1eb70c5b69b4ab364eec3f4017898ff7f03..4f55477ffe087721ad13774e82a5e2b1e6cec7c4 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -200,6 +200,32 @@ chk_msk_cestab() "${expected}" "${msg}" "" }
+chk_dump_one() +{ + local ss_token + local token + local msg + + ss_token="$(ss -inmHMN $ns | grep 'token:' |\ + head -n 1 |\ + sed 's/.*token:([0-9a-f]*).*/\1/')" + + token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ + awk -F':[ \t]+' '/^token/ {print $2}')" + + msg="....chk dump_one" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + msk_info_get_value() { local port="${1}" @@ -290,6 +316,7 @@ chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 chk_msk_cestab 2 +chk_dump_one flush_pids
chk_msk_inuse 0 "2->0"
From: Geliang Tang tanggeliang@kylinos.cn
In mptcp_pm_nl_set_flags(), "entry" is copied to "local" when pernet->lock is held to avoid direct access to entry without pernet->lock.
Therefore, "local->flags" should be passed to mptcp_nl_set_flags instead of "entry->flags" when pernet->lock is not held, so as to avoid access to entry.
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Fixes: 145dc6cc4abd ("mptcp: pm: change to fullmesh only for 'subflow'") Reviewed-by: Matthieu Baerts (NGI0) matttbe@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- net/mptcp/pm_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d4328443d844dbe52d2b0cc0018dcac1e4245320..fb83eba041f17c09923a8ed1a033983692962c5a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1983,7 +1983,7 @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, *local = *entry; spin_unlock_bh(&pernet->lock);
- mptcp_nl_set_flags(net, &local->addr, entry->flags, changed); + mptcp_nl_set_flags(net, &local->addr, local->flags, changed); return 0; }
From: Geliang Tang tanggeliang@kylinos.cn
The number of parameters in mptcp_nl_set_flags() can be reduced. Only need to pass a "local" parameter to it instead of "local->addr" and "local->flags".
Signed-off-by: Geliang Tang tanggeliang@kylinos.cn Reviewed-by: Matthieu Baerts (NGI0) matttbe@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- net/mptcp/pm_netlink.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index fb83eba041f17c09923a8ed1a033983692962c5a..4bebc4963c42d481f041999ef8976a58889919d5 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1907,11 +1907,12 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, spin_unlock_bh(&msk->pm.lock); }
-static void mptcp_nl_set_flags(struct net *net, struct mptcp_addr_info *addr, - u8 flags, u8 changed) +static void mptcp_nl_set_flags(struct net *net, + struct mptcp_pm_addr_entry *local, + u8 changed) { - u8 is_subflow = !!(flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); - u8 bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); + u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); + u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); long s_slot = 0, s_num = 0; struct mptcp_sock *msk;
@@ -1926,10 +1927,10 @@ static void mptcp_nl_set_flags(struct net *net, struct mptcp_addr_info *addr,
lock_sock(sk); if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) - mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup); + mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, NULL, bkup); /* Subflows will only be recreated if the SUBFLOW flag is set */ if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) - mptcp_pm_nl_fullmesh(msk, addr); + mptcp_pm_nl_fullmesh(msk, &local->addr); release_sock(sk);
next: @@ -1983,7 +1984,7 @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, *local = *entry; spin_unlock_bh(&pernet->lock);
- mptcp_nl_set_flags(net, &local->addr, local->flags, changed); + mptcp_nl_set_flags(net, local, changed); return 0; }
When the userspace PM is used, or when the in-kernel limits are reached, there will be no need to schedule the PM worker to signal new addresses. That corresponds to pm->work_pending set to 0.
In this case, an early exit can be done in mptcp_pm_add_addr_echoed() not to hold the PM lock, and iterate over the announced addresses list, not to schedule the worker anyway in this case. This is similar to what is done when a connection or a subflow has been established.
Reviewed-by: Geliang Tang geliang@kernel.org Signed-off-by: Matthieu Baerts (NGI0) matttbe@kernel.org --- net/mptcp/pm.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 16cacce6c10fe86467aa7ef8e588f9f535b586fb..6c8cadf84f31f4c7dcc38b787beda048d5362dc8 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -251,6 +251,9 @@ void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
pr_debug("msk=%p\n", msk);
+ if (!READ_ONCE(pm->work_pending)) + return; + spin_lock_bh(&pm->lock);
if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending))
linux-kselftest-mirror@lists.linaro.org