Add selftest for the IPv6 fragmentation regression which affected several stable kernels.
Commit a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable without some prerequisite commits. This caused a regression when sending IPv6 UDP packets by preventing fragmentation and instead returning -1 (EMSGSIZE).
Add selftest to check for this issue by attempting to send a packet larger than the interface MTU. The packet will be fragmented on a working kernel, with sendmsg(2) correctly returning the expected number of bytes sent. When the regression is present, sendmsg returns -1 and sets errno to EMSGSIZE.
Signed-off-by: Brett A C Sheffield bacs@librecast.net Link: https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ipv6_fragmentation.c | 204 ++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 tools/testing/selftests/net/ipv6_fragmentation.c
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..3d4b4a53dfda 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b31a71f2b372..f83f91b758ae 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -117,6 +117,7 @@ TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation
# YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..21e1a3cdc63d --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield bacs@librecast.net + * + * Kernel selftest for the IPv6 fragmentation regression which affected + * stable kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: + * a18dfa9925b9 ("ipv6: save dontfrag in cork") + * was backported to stable without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue. sendmsg returns correctly (8192) + * on a working kernel, and returns -1 (EMSGSIZE) when the regression is + * present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +/* ensure MTU is smaller than what we plan to send */ +static int set_mtu(int ctl, char *ifname, struct ifreq *ifr) +{ + ifr->ifr_mtu = MTU; + return ioctl(ctl, SIOCSIFMTU, ifr); +} + +/* bring up interface */ +static int interface_up(int ctl, char *ifname, struct ifreq *ifr) +{ + if (ioctl(ctl, SIOCGIFFLAGS, ifr) == -1) { + perror("ioctl SIOCGIFFLAGS"); + return -1; + } + ifr->ifr_flags = ifr->ifr_flags | IFF_UP; + return ioctl(ctl, SIOCSIFFLAGS, ifr); +} + +/* no need to wait for DAD in our namespace */ +static int disable_dad(char *ifname) +{ + char sysvar[] = "/proc/sys/net/ipv6/conf/%s/accept_dad"; + char fname[IFNAMSIZ + sizeof(sysvar)]; + int fd; + + snprintf(fname, sizeof(fname), sysvar, ifname); + fd = open(fname, O_WRONLY); + if (fd == -1) { + perror("open accept_dad"); + return -1; + } + if (write(fd, "0", 1) != 1) { + perror("write"); + return -1; + } + return close(fd); +} + +/* create TAP interface that will be deleted when this process exits */ +static int create_interface(int ctl, char *ifname, struct ifreq *ifr) +{ + int fd; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) { + perror("open tun"); + return -1; + } + + ifr->ifr_flags = IFF_TAP | IFF_NO_PI; + if (ioctl(fd, TUNSETIFF, (void *)ifr) == -1) { + close(fd); + perror("ioctl: TUNSETIFF"); + return -1; + } + strcpy(ifname, ifr->ifr_name); + + return fd; +} + +/* we need to set MTU, so do this in a namespace to play nicely */ +static int create_namespace(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + if (unshare(CLONE_NEWNET) != 0) { + perror("unshare"); + return -1; + } + + fd = open(netns_path, O_RDONLY); + if (fd == -1) { + perror("open"); + return -1; + } + + if (setns(fd, CLONE_NEWNET)) { + perror("setns"); + return -1; + } + + return 0; +} + +static int setup(void) +{ + struct ifreq ifr = {0}; + char ifname[IFNAMSIZ]; + int fd = -1; + int ctl; + + if (create_namespace() == -1) + return -1; + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + return -1; + + memset(ifname, 0, sizeof(ifname)); + fd = create_interface(ctl, ifname, &ifr); + if (fd == -1) + goto err_close_ctl; + if (disable_dad(ifname) == -1) + goto err_close_fd; + if (interface_up(ctl, ifname, &ifr) == -1) + goto err_close_fd; + if (set_mtu(ctl, ifname, &ifr) == -1) + goto err_close_fd; + usleep(10000); /* give interface a moment to wake up */ + goto err_close_ctl; +err_close_fd: + close(fd); + fd = -1; +err_close_ctl: + close(ctl); + return fd; +} + +int main(void) +{ + /* address doesn't matter, use an IPv6 multicast address for simplicity */ + struct in6_addr addr = { + .s6_addr[0] = 0xff, /* multicast */ + .s6_addr[1] = 0x12, /* set flags (T, link-local) */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = 4242 + }; + char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf)}; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int ns_fd; + int s; + + printf("Testing IPv6 fragmentation\n"); + ns_fd = setup(); + if (ns_fd == -1) + return 1; + s = socket(AF_INET6, SOCK_DGRAM, 0); + msg.msg_name = (struct sockaddr *)&sa; + msg.msg_namelen = sizeof(sa); + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + perror("send"); + return 1; + } else if (rc != LARGER_THAN_MTU) { + fprintf(stderr, "send() returned %zi\n", rc); + return 1; + } + close(s); + close(ns_fd); + + return 0; +}
Brett A C Sheffield wrote:
Add selftest for the IPv6 fragmentation regression which affected several stable kernels.
Commit a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable without some prerequisite commits. This caused a regression when sending IPv6 UDP packets by preventing fragmentation and instead returning -1 (EMSGSIZE).
Add selftest to check for this issue by attempting to send a packet larger than the interface MTU. The packet will be fragmented on a working kernel, with sendmsg(2) correctly returning the expected number of bytes sent. When the regression is present, sendmsg returns -1 and sets errno to EMSGSIZE.
Signed-off-by: Brett A C Sheffield bacs@librecast.net Link: https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com
Thanks for adding a regression test for this.
tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ipv6_fragmentation.c | 204 ++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 tools/testing/selftests/net/ipv6_fragmentation.c
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..3d4b4a53dfda 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b31a71f2b372..f83f91b758ae 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -117,6 +117,7 @@ TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..21e1a3cdc63d --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +/*
- Author: Brett A C Sheffield bacs@librecast.net
- Kernel selftest for the IPv6 fragmentation regression which affected
- stable kernels:
- Commit:
- a18dfa9925b9 ("ipv6: save dontfrag in cork")
- was backported to stable without some prerequisite commits.
- This caused a regression when sending IPv6 UDP packets by preventing
- fragmentation and instead returning -1 (EMSGSIZE).
- This selftest demonstrates the issue. sendmsg returns correctly (8192)
- on a working kernel, and returns -1 (EMSGSIZE) when the regression is
- present.
- The regression was not present in the mainline kernel, but add this test to
- catch similar breakage in future.
- */
+#define _GNU_SOURCE
+#include <fcntl.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h>
+#define MTU 1500 +#define LARGER_THAN_MTU 8192
+/* ensure MTU is smaller than what we plan to send */ +static int set_mtu(int ctl, char *ifname, struct ifreq *ifr) +{
- ifr->ifr_mtu = MTU;
- return ioctl(ctl, SIOCSIFMTU, ifr);
+}
+/* bring up interface */ +static int interface_up(int ctl, char *ifname, struct ifreq *ifr) +{
- if (ioctl(ctl, SIOCGIFFLAGS, ifr) == -1) {
perror("ioctl SIOCGIFFLAGS");
return -1;
- }
- ifr->ifr_flags = ifr->ifr_flags | IFF_UP;
- return ioctl(ctl, SIOCSIFFLAGS, ifr);
+}
+/* no need to wait for DAD in our namespace */ +static int disable_dad(char *ifname) +{
- char sysvar[] = "/proc/sys/net/ipv6/conf/%s/accept_dad";
- char fname[IFNAMSIZ + sizeof(sysvar)];
- int fd;
- snprintf(fname, sizeof(fname), sysvar, ifname);
- fd = open(fname, O_WRONLY);
- if (fd == -1) {
perror("open accept_dad");
return -1;
- }
- if (write(fd, "0", 1) != 1) {
perror("write");
return -1;
- }
- return close(fd);
+}
+/* create TAP interface that will be deleted when this process exits */ +static int create_interface(int ctl, char *ifname, struct ifreq *ifr) +{
- int fd;
- fd = open("/dev/net/tun", O_RDWR);
- if (fd == -1) {
perror("open tun");
return -1;
- }
- ifr->ifr_flags = IFF_TAP | IFF_NO_PI;
- if (ioctl(fd, TUNSETIFF, (void *)ifr) == -1) {
close(fd);
perror("ioctl: TUNSETIFF");
return -1;
- }
- strcpy(ifname, ifr->ifr_name);
- return fd;
+}
+/* we need to set MTU, so do this in a namespace to play nicely */ +static int create_namespace(void) +{
- const char *netns_path = "/proc/self/ns/net";
- int fd;
- if (unshare(CLONE_NEWNET) != 0) {
perror("unshare");
return -1;
- }
Is this not sufficient to move the current process in its own netns?
- fd = open(netns_path, O_RDONLY);
- if (fd == -1) {
perror("open");
return -1;
- }
- if (setns(fd, CLONE_NEWNET)) {
perror("setns");
return -1;
- }
- return 0;
+}
+static int setup(void) +{
- struct ifreq ifr = {0};
- char ifname[IFNAMSIZ];
- int fd = -1;
- int ctl;
- if (create_namespace() == -1)
return -1;
- ctl = socket(AF_LOCAL, SOCK_STREAM, 0);
- if (ctl == -1)
return -1;
- memset(ifname, 0, sizeof(ifname));
- fd = create_interface(ctl, ifname, &ifr);
- if (fd == -1)
goto err_close_ctl;
- if (disable_dad(ifname) == -1)
goto err_close_fd;
- if (interface_up(ctl, ifname, &ifr) == -1)
goto err_close_fd;
- if (set_mtu(ctl, ifname, &ifr) == -1)
goto err_close_fd;
- usleep(10000); /* give interface a moment to wake up */
This may be racy. Wait on a more explicit signal? E.g., /sys/class/net/$DEV/operstate.
- goto err_close_ctl;
+err_close_fd:
- close(fd);
- fd = -1;
+err_close_ctl:
- close(ctl);
- return fd;
+}
+int main(void) +{
- /* address doesn't matter, use an IPv6 multicast address for simplicity */
- struct in6_addr addr = {
.s6_addr[0] = 0xff, /* multicast */
.s6_addr[1] = 0x12, /* set flags (T, link-local) */
- };
- struct sockaddr_in6 sa = {
.sin6_family = AF_INET6,
.sin6_addr = addr,
.sin6_port = 4242
- };
- char buf[LARGER_THAN_MTU] = {0};
- struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf)};
- struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_name = (struct sockaddr *)&sa,
.msg_namelen = sizeof(sa),
- };
- ssize_t rc;
- int ns_fd;
- int s;
- printf("Testing IPv6 fragmentation\n");
- ns_fd = setup();
- if (ns_fd == -1)
return 1;
- s = socket(AF_INET6, SOCK_DGRAM, 0);
- msg.msg_name = (struct sockaddr *)&sa;
- msg.msg_namelen = sizeof(sa);
nit: duplicate?
Also, no local address is set. This uses the IPv6 auto assigned address?
- rc = sendmsg(s, &msg, 0);
- if (rc == -1) {
perror("send");
return 1;
Probably want to cleanup state both on success and failure.
Could use KSFT_.. exit codes, though 0/1 works just as well for kselftests in practice.
- } else if (rc != LARGER_THAN_MTU) {
fprintf(stderr, "send() returned %zi\n", rc);
return 1;
- }
- close(s);
- close(ns_fd);
- return 0;
+}
2.49.1
Thanks for the review, Willem.
On 2025-08-25 12:16, Willem de Bruijn wrote:
Brett A C Sheffield wrote:
Add selftest for the IPv6 fragmentation regression which affected several stable kernels.
Commit a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable without some prerequisite commits. This caused a regression when sending IPv6 UDP packets by preventing fragmentation and instead returning -1 (EMSGSIZE).
Add selftest to check for this issue by attempting to send a packet larger than the interface MTU. The packet will be fragmented on a working kernel, with sendmsg(2) correctly returning the expected number of bytes sent. When the regression is present, sendmsg returns -1 and sets errno to EMSGSIZE.
Signed-off-by: Brett A C Sheffield bacs@librecast.net Link: https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com
Thanks for adding a regression test for this.
No problem. I wrote a test for myself when bisecting the problem back in June - makes sense to convert it to a selftest.
+/* we need to set MTU, so do this in a namespace to play nicely */ +static int create_namespace(void) +{
- const char *netns_path = "/proc/self/ns/net";
- int fd;
- if (unshare(CLONE_NEWNET) != 0) {
perror("unshare");
return -1;
- }
Is this not sufficient to move the current process in its own netns?
Yes. Yes it is. Apparently I did not read the man page properly.
- fd = open(netns_path, O_RDONLY);
- if (fd == -1) {
perror("open");
return -1;
- }
- if (setns(fd, CLONE_NEWNET)) {
perror("setns");
return -1;
- }
- return 0;
+}
+static int setup(void) +{
- struct ifreq ifr = {0};
- char ifname[IFNAMSIZ];
- int fd = -1;
- int ctl;
- if (create_namespace() == -1)
return -1;
- ctl = socket(AF_LOCAL, SOCK_STREAM, 0);
- if (ctl == -1)
return -1;
- memset(ifname, 0, sizeof(ifname));
- fd = create_interface(ctl, ifname, &ifr);
- if (fd == -1)
goto err_close_ctl;
- if (disable_dad(ifname) == -1)
goto err_close_fd;
- if (interface_up(ctl, ifname, &ifr) == -1)
goto err_close_fd;
- if (set_mtu(ctl, ifname, &ifr) == -1)
goto err_close_fd;
- usleep(10000); /* give interface a moment to wake up */
This may be racy. Wait on a more explicit signal? E.g., /sys/class/net/$DEV/operstate.
Good thinking. I'll try that.
- struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_name = (struct sockaddr *)&sa,
.msg_namelen = sizeof(sa),
- };
- ssize_t rc;
- int ns_fd;
- int s;
- printf("Testing IPv6 fragmentation\n");
- ns_fd = setup();
- if (ns_fd == -1)
return 1;
- s = socket(AF_INET6, SOCK_DGRAM, 0);
- msg.msg_name = (struct sockaddr *)&sa;
- msg.msg_namelen = sizeof(sa);
nit: duplicate?
Well spotted. Will fix.
Also, no local address is set. This uses the IPv6 auto assigned address?
Correct. The test sends to a link-local scope multicast group from the autoconf link-local address. I'll clarify that in the comments at the top of the test.
- rc = sendmsg(s, &msg, 0);
- if (rc == -1) {
perror("send");
return 1;
Probably want to cleanup state both on success and failure.
Ack.
Could use KSFT_.. exit codes, though 0/1 works just as well for kselftests in practice.
Ok.
- } else if (rc != LARGER_THAN_MTU) {
fprintf(stderr, "send() returned %zi\n", rc);
return 1;
- }
- close(s);
- close(ns_fd);
- return 0;
+}
2.49.1
Thanks again - expect a v2 when I have that cleaned up and re-tested.
Cheers,
Brett --
Add selftest for the IPv6 fragmentation regression which affected several stable kernels.
Commit a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable without some prerequisite commits. This caused a regression when sending IPv6 UDP packets by preventing fragmentation and instead returning -1 (EMSGSIZE).
Add selftest to check for this issue by attempting to send a packet larger than the interface MTU. The packet will be fragmented on a working kernel, with sendmsg(2) correctly returning the expected number of bytes sent. When the regression is present, sendmsg returns -1 and sets errno to EMSGSIZE.
Link: https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com Signed-off-by: Brett A C Sheffield bacs@librecast.net --- Willem: Thanks for the suggestion to check /sys/class/net/$DEV/operstate
I did try this but, if I call unshare() and THEN create a TAP interface in the new namespace, operstate no longer seems to be visible to the process?
The process can still read operstate for interfaces in the shared namespace, but not ones created in the unshare()d namespace.
I'm sure I'm doing something wrong there, but after trying a few different things including reading operstate from netlink it suddenly occurred to me that a simpler and more reliable way to check whether an interface is ready to send is to, er, send, and then handle the error for the (unlikely) failure case and retry.
I've incorporated your other review suggestions in this v2. Many thanks.
v2 changes: - remove superfluous namespace calls - unshare(2) suffices - remove usleep(). Don't wait for the interface to be ready, just send, and handle the (less likely) error case by retrying. - set destination address only once - document our use of the IPv6 link-local source address - send to port 9 (DISCARD) instead of 4242 (DONT PANIC) - ensure sockets are closed on failure paths - use KSFT exit codes for clarity
v1: https://lore.kernel.org/netdev/20250825092548.4436-3-bacs@librecast.net
tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ipv6_fragmentation.c | 189 ++++++++++++++++++ 3 files changed, 191 insertions(+) create mode 100644 tools/testing/selftests/net/ipv6_fragmentation.c
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..3d4b4a53dfda 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index eef0b8f8a7b0..276e0481d996 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -117,6 +117,7 @@ TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation TEST_PROGS += route_hint.sh
# YNL files, must be before "include ..lib.mk" diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..4ba16bf56a32 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield bacs@librecast.net + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet from + * the autoconfigured link-local address to an arbritrary multicast group. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "../kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +/* ensure MTU is smaller than what we plan to send */ +static int set_mtu(int ctl, struct ifreq *ifr) +{ + ifr->ifr_mtu = MTU; + return ioctl(ctl, SIOCSIFMTU, ifr); +} + +/* bring up interface */ +static int interface_up(int ctl, struct ifreq *ifr) +{ + if (ioctl(ctl, SIOCGIFFLAGS, ifr) == -1) { + perror("ioctl SIOCGIFFLAGS"); + return -1; + } + ifr->ifr_flags = ifr->ifr_flags | IFF_UP; + return ioctl(ctl, SIOCSIFFLAGS, ifr); +} + +/* no need to wait for DAD in our namespace */ +static int disable_dad(char *ifname) +{ + char sysvar[] = "/proc/sys/net/ipv6/conf/%s/accept_dad"; + char fname[IFNAMSIZ + sizeof(sysvar)]; + int fd; + + snprintf(fname, sizeof(fname), sysvar, ifname); + fd = open(fname, O_WRONLY); + if (fd == -1) { + perror("open accept_dad"); + return -1; + } + if (write(fd, "0", 1) != 1) { + perror("write"); + return -1; + } + return close(fd); +} + +/* create TAP interface that will be deleted when this process exits */ +static int create_interface(char *ifname, struct ifreq *ifr) +{ + int fd; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) { + perror("open tun"); + return -1; + } + + ifr->ifr_flags = IFF_TAP | IFF_NO_PI; + if (ioctl(fd, TUNSETIFF, (void *)ifr) == -1) { + close(fd); + perror("ioctl: TUNSETIFF"); + return -1; + } + strcpy(ifname, ifr->ifr_name); + + return fd; +} + +static int setup(void) +{ + struct ifreq ifr = {0}; + char ifname[IFNAMSIZ]; + int fd = -1; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + return -1; + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + return -1; + + memset(ifname, 0, sizeof(ifname)); + fd = create_interface(ifname, &ifr); + if (fd == -1) + goto err_close_ctl; + if (disable_dad(ifname) == -1) + goto err_close_fd; + if (interface_up(ctl, &ifr) == -1) + goto err_close_fd; + if (set_mtu(ctl, &ifr) == -1) + goto err_close_fd; + goto err_close_ctl; +err_close_fd: + close(fd); + fd = -1; +err_close_ctl: + close(ctl); + return fd; +} + +int main(void) +{ + /* destination doesn't matter, use an IPv6 link-local multicast group */ + struct in6_addr addr = { + .s6_addr[0] = 0xff, /* multicast */ + .s6_addr[1] = 0x12, /* set flags (T, link-local) */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = 9 /* port 9/udp (DISCARD) */ + }; + char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf)}; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int ns_fd; + int err = KSFT_FAIL; + int s; + + printf("Testing IPv6 fragmentation\n"); + ns_fd = setup(); + if (ns_fd == -1) { + printf("[FAIL] test setup failed\n"); + return KSFT_FAIL; + } + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) + goto send_again; + printf("[FAIL] sendmsg: %s\n", strerror(errno)); + goto err_close_socket; + } else if (rc != LARGER_THAN_MTU) { + printf("[FAIL] sendmsg() returned %zi\n", rc); + goto err_close_socket; + } + printf("[PASS] sendmsg() returned %zi\n", rc); + err = KSFT_PASS; + +err_close_socket: + close(s); + close(ns_fd); + return err; +}
linux-kselftest-mirror@lists.linaro.org