As is described in the "How to use MPTCP?" section in MPTCP wiki [1]:
"Your app can create sockets with IPPROTO_MPTCP as the proto: ( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be forced to create and use MPTCP sockets instead of TCP ones via the mptcpize command bundled with the mptcpd daemon."
But the mptcpize (LD_PRELOAD technique) command has some limitations [2]:
- it doesn't work if the application is not using libc (e.g. GoLang apps) - in some envs, it might not be easy to set env vars / change the way apps are launched, e.g. on Android - mptcpize needs to be launched with all apps that want MPTCP: we could have more control from BPF to enable MPTCP only for some apps or all the ones of a netns or a cgroup, etc. - it is not in BPF, we cannot talk about it at netdev conf.
So this patchset attempts to use BPF to implement functions similer to mptcpize.
The main idea is add a hook in sys_socket() to change the protocol id from IPPROTO_TCP (or 0) to IPPROTO_MPTCP.
[1] https://github.com/multipath-tcp/mptcp_net-next/wiki [2] https://github.com/multipath-tcp/mptcp_net-next/issues/79
v5: - add bpf_mptcpify helper.
v4: - use lsm_cgroup/socket_create
v3: - patch 8: char cmd[128]; -> char cmd[256];
v2: - Fix build selftests errors reported by CI
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79 Signed-off-by: Geliang Tang geliang.tang@suse.com --- include/linux/bpf.h | 1 + include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 6 +- include/uapi/linux/bpf.h | 7 + kernel/bpf/bpf_lsm.c | 2 + net/mptcp/bpf.c | 20 +++ net/socket.c | 4 +- security/apparmor/lsm.c | 8 +- security/security.c | 2 +- security/selinux/hooks.c | 6 +- tools/include/uapi/linux/bpf.h | 7 + .../testing/selftests/bpf/prog_tests/mptcp.c | 128 ++++++++++++++++-- tools/testing/selftests/bpf/progs/mptcpify.c | 17 +++ 13 files changed, 187 insertions(+), 23 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 360433f14496..77cdbe21e5c1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2913,6 +2913,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto; +extern const struct bpf_func_proto bpf_mptcpify_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 7308a1a7599b..d475e6ab20ca 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -288,7 +288,7 @@ LSM_HOOK(int, 0, watch_key, struct key *key) LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other, struct sock *newsk) LSM_HOOK(int, 0, unix_may_send, struct socket *sock, struct socket *other) -LSM_HOOK(int, 0, socket_create, int family, int type, int protocol, int kern) +LSM_HOOK(int, 0, socket_create, int *family, int *type, int *protocol, int kern) LSM_HOOK(int, 0, socket_post_create, struct socket *sock, int family, int type, int protocol, int kern) LSM_HOOK(int, 0, socket_socketpair, struct socket *socka, struct socket *sockb) diff --git a/include/linux/security.h b/include/linux/security.h index 32828502f09e..a683c10d4071 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1416,7 +1416,7 @@ static inline int security_watch_key(struct key *key)
int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk); int security_unix_may_send(struct socket *sock, struct socket *other); -int security_socket_create(int family, int type, int protocol, int kern); +int security_socket_create(int *family, int *type, int *protocol, int kern); int security_socket_post_create(struct socket *sock, int family, int type, int protocol, int kern); int security_socket_socketpair(struct socket *socka, struct socket *sockb); @@ -1481,8 +1481,8 @@ static inline int security_unix_may_send(struct socket *sock, return 0; }
-static inline int security_socket_create(int family, int type, - int protocol, int kern) +static inline int security_socket_create(int *family, int *type, + int *protocol, int kern) { return 0; } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 600d0caebbd8..265077883f18 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5583,6 +5583,12 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * int bpf_mptcpify(int *family, int *type, int *protocol) + * Description + * Dynamically mptcpify a TCP socket as an MPTCP one. + * Return + * 0 on success. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5797,6 +5803,7 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(mptcpify, 212, ##ctx) \ /* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index e14c822f8911..8cfa6a44f8f2 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -248,6 +248,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) prog->aux->attach_btf_id)) return &bpf_unlocked_sk_getsockopt_proto; return NULL; + case BPF_FUNC_mptcpify: + return &bpf_mptcpify_proto; #endif default: return tracing_prog_func_proto(func_id, prog); diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 5a0a84ad94af..7207f60dad76 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -19,3 +19,23 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
return NULL; } + +BPF_CALL_3(bpf_mptcpify, int *, family, int *, type, int *, protocol) +{ + if ((*family == AF_INET || *family == AF_INET6) && + *type == SOCK_STREAM && + (!*protocol || *protocol == IPPROTO_TCP)) { + *protocol = IPPROTO_MPTCP; + } + + return 0; +} + +const struct bpf_func_proto bpf_mptcpify_proto = { + .func = bpf_mptcpify, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; diff --git a/net/socket.c b/net/socket.c index 2b0e54b2405c..3957e2a865ef 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1328,7 +1328,7 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) int err; struct socket *sock = NULL;
- err = security_socket_create(family, type, protocol, 1); + err = security_socket_create(&family, &type, &protocol, 1); if (err) goto out;
@@ -1488,7 +1488,7 @@ int __sock_create(struct net *net, int family, int type, int protocol, family = PF_PACKET; }
- err = security_socket_create(family, type, protocol, kern); + err = security_socket_create(&family, &type, &protocol, kern); if (err) return err;
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index c9463bd0307d..ae823b95d304 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -868,7 +868,7 @@ static void apparmor_sk_clone_security(const struct sock *sk, /** * apparmor_socket_create - check perms before creating a new socket */ -static int apparmor_socket_create(int family, int type, int protocol, int kern) +static int apparmor_socket_create(int *family, int *type, int *protocol, int kern) { struct aa_label *label; int error = 0; @@ -877,10 +877,10 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern)
label = begin_current_label_crit_section(); if (!(kern || unconfined(label))) - error = af_select(family, - create_perm(label, family, type, protocol), + error = af_select(*family, + create_perm(label, *family, *type, *protocol), aa_af_perm(label, OP_CREATE, AA_MAY_CREATE, - family, type, protocol)); + *family, *type, *protocol)); end_current_label_crit_section(label);
return error; diff --git a/security/security.c b/security/security.c index b720424ca37d..bbebcddce420 100644 --- a/security/security.c +++ b/security/security.c @@ -4078,7 +4078,7 @@ EXPORT_SYMBOL(security_unix_may_send); * * Return: Returns 0 if permission is granted. */ -int security_socket_create(int family, int type, int protocol, int kern) +int security_socket_create(int *family, int *type, int *protocol, int kern) { return call_int_hook(socket_create, 0, family, type, protocol, kern); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d06e350fedee..8c2303f052f5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4512,8 +4512,8 @@ static int sock_has_perm(struct sock *sk, u32 perms) &ad); }
-static int selinux_socket_create(int family, int type, - int protocol, int kern) +static int selinux_socket_create(int *family, int *type, + int *protocol, int kern) { const struct task_security_struct *tsec = selinux_cred(current_cred()); u32 newsid; @@ -4523,7 +4523,7 @@ static int selinux_socket_create(int family, int type, if (kern) return 0;
- secclass = socket_type_to_security_class(family, type, protocol); + secclass = socket_type_to_security_class(*family, *type, *protocol); rc = socket_sockcreate_sid(tsec, secclass, &newsid); if (rc) return rc; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 600d0caebbd8..265077883f18 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5583,6 +5583,12 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * int bpf_mptcpify(int *family, int *type, int *protocol) + * Description + * Dynamically mptcpify a TCP socket as an MPTCP one. + * Return + * 0 on success. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5797,6 +5803,7 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(mptcpify, 212, ##ctx) \ /* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index cd0c42fff7c0..93767e441e17 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -6,8 +6,9 @@ #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" +#include "mptcpify.skel.h"
-#define NS_TEST "mptcp_ns" +char NS_TEST[32];
#ifndef TCP_CA_NAME_MAX #define TCP_CA_NAME_MAX 16 @@ -22,6 +23,26 @@ struct mptcp_storage { char ca_name[TCP_CA_NAME_MAX]; };
+static struct nstoken *create_netns(void) +{ + srand(time(NULL)); + snprintf(NS_TEST, sizeof(NS_TEST), "mptcp_ns_%d", rand()); + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); + + return open_netns(NS_TEST); +fail: + return NULL; +} + +static void cleanup_netns(struct nstoken *nstoken) +{ + if (nstoken) + close_netns(nstoken); + + SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST); +} + static int verify_tsk(int map_fd, int client_fd) { int err, cfd = client_fd; @@ -147,11 +168,8 @@ static void test_base(void) if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return;
- SYS(fail, "ip netns add %s", NS_TEST); - SYS(fail, "ip -net %s link set dev lo up", NS_TEST); - - nstoken = open_netns(NS_TEST); - if (!ASSERT_OK_PTR(nstoken, "open_netns")) + nstoken = create_netns(); + if (!ASSERT_OK_PTR(nstoken, "create_netns")) goto fail;
/* without MPTCP */ @@ -174,11 +192,101 @@ static void test_base(void) close(server_fd);
fail: - if (nstoken) - close_netns(nstoken); + cleanup_netns(nstoken); + + close(cgroup_fd); +} + +static void send_byte(int fd) +{ + char b = 0x55; + + ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte"); +} + +static int verify_mptcpify(void) +{ + char cmd[256]; + int err = 0; + + snprintf(cmd, sizeof(cmd), + "ip netns exec %s ss -tOni | grep -q '%s'", + NS_TEST, "tcp-ulp-mptcp"); + if (!ASSERT_OK(system(cmd), "No tcp-ulp-mptcp found!")) + err++; + + snprintf(cmd, sizeof(cmd), + "ip netns exec %s nstat -asz %s | awk '%s' | grep -q '%s'", + NS_TEST, "MPTcpExtMPCapableSYNACKRX", + "NR==1 {next} {print $2}", "1"); + if (!ASSERT_OK(system(cmd), "No MPTcpExtMPCapableSYNACKRX found!")) + err++; + + return err; +} + +static int run_mptcpify(int cgroup_fd) +{ + int server_fd, client_fd, prog_fd, err = 0; + struct mptcpify *mptcpify_skel; + + mptcpify_skel = mptcpify__open_and_load(); + if (!ASSERT_OK_PTR(mptcpify_skel, "mptcpify__open_and_load")) + return -EIO; + + prog_fd = bpf_program__fd(mptcpify_skel->progs.mptcpify); + if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) { + err = -EIO; + goto out; + }
- SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0); + if (!ASSERT_OK(err, "attach alloc_prog_fd")) { + err = -EIO; + goto out; + }
+ /* without MPTCP */ + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) { + err = -EIO; + goto out; + } + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect to fd")) { + err = -EIO; + goto close_server; + } + + send_byte(client_fd); + err += verify_mptcpify(); + + close(client_fd); +close_server: + close(server_fd); +out: + mptcpify__destroy(mptcpify_skel); + return err; +} + +static void test_mptcpify(void) +{ + struct nstoken *nstoken = NULL; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/mptcpify"); + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) + return; + + nstoken = create_netns(); + if (!ASSERT_OK_PTR(nstoken, "create_netns")) + goto fail; + + ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify"); + +fail: + cleanup_netns(nstoken); close(cgroup_fd); }
@@ -186,4 +294,6 @@ void test_mptcp(void) { if (test__start_subtest("base")) test_base(); + if (test__start_subtest("mptcpify")) + test_mptcpify(); } diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c new file mode 100644 index 000000000000..58be68c021da --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcpify.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023, SUSE. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +SEC("lsm_cgroup/socket_create") +int BPF_PROG(mptcpify, int *family, int *type, int *protocol, int kern) +{ + if (!kern) + bpf_mptcpify(family, type, protocol); + + return 1; +}