Make sure the IOAM data insertion is not applied on cloned skb's. As a consequence, ioam selftests needed a refactoring.
Justin Iurman (2): ioam6: fix write to cloned skb in ipv6_hop_ioam() selftests: ioam6: refactoring to align with the fix
net/ipv6/exthdrs.c | 8 ++ tools/testing/selftests/net/ioam6.sh | 38 ++++---- tools/testing/selftests/net/ioam6_parser.c | 101 +++++++++++---------- 3 files changed, 81 insertions(+), 66 deletions(-)
base-commit: 166c2c8a6a4dc2e4ceba9e10cfe81c3e469e3210
ioam6_fill_trace_data() writes inside the skb payload without ensuring it's writeable (e.g., not cloned). This function is called both from the input and output path. The output path (ioam6_iptunnel) already does the check. This commit provides a fix for the input path, inside ipv6_hop_ioam().
Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace ") Reported-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Justin Iurman justin.iurman@uliege.be --- net/ipv6/exthdrs.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae792450..f68e5faab3aa 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -943,6 +943,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb);
+ if (skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); + } + ioam6_fill_trace_data(skb, ns, trace, true); break; default:
On Sat, 2024-02-17 at 00:43 +0100, Justin Iurman wrote:
ioam6_fill_trace_data() writes inside the skb payload without ensuring it's writeable (e.g., not cloned). This function is called both from the input and output path. The output path (ioam6_iptunnel) already does the check. This commit provides a fix for the input path, inside ipv6_hop_ioam().
Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace ") Reported-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Justin Iurman justin.iurman@uliege.be
net/ipv6/exthdrs.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae792450..f68e5faab3aa 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -943,6 +943,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb);
if (skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto drop;
My personal preference would be for using skb_ensure_writable() here, with write_len == optoff + hdr->opt_len.
hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
Note that this can potentially change the network header ptr and the caller - ip6_parse_tlv() - has cached such value in 'nh'. You also need to update ip6_parse_tlv() to reload such pointer.
Side note: a bunch of self-tests are apparently stuck after this series. I think it's an unrelated problem. I'll try to have a better look.
Cheers,
Paolo
On 2/19/24 10:05, Paolo Abeni wrote:
On Sat, 2024-02-17 at 00:43 +0100, Justin Iurman wrote:
ioam6_fill_trace_data() writes inside the skb payload without ensuring it's writeable (e.g., not cloned). This function is called both from the input and output path. The output path (ioam6_iptunnel) already does the check. This commit provides a fix for the input path, inside ipv6_hop_ioam().
Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace ") Reported-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Justin Iurman justin.iurman@uliege.be
net/ipv6/exthdrs.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae792450..f68e5faab3aa 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -943,6 +943,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb);
if (skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto drop;
My personal preference would be for using skb_ensure_writable() here, with write_len == optoff + hdr->opt_len.
OK, will do!
hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
Note that this can potentially change the network header ptr and the caller - ip6_parse_tlv() - has cached such value in 'nh'. You also need to update ip6_parse_tlv() to reload such pointer.
+1, I completely missed it, thanks!
Side note: a bunch of self-tests are apparently stuck after this series. I think it's an unrelated problem. I'll try to have a better look.
Can you share the config to observe such behavior? I'll try to investigate too.
Cheers,
Paolo
ioam6_parser uses a packet socket. After the fix to prevent writing to cloned skb's, the receiver does not see its IOAM data anymore, which makes input/forward ioam-selftests to fail. As a workaround, ioam6_parser now uses an IPv6 raw socket and leverages ancillary data to get hop-by-hop options. As a consequence, the hook is "after" the IOAM data insertion by the receiver and all tests are working again.
Signed-off-by: Justin Iurman justin.iurman@uliege.be --- tools/testing/selftests/net/ioam6.sh | 38 ++++---- tools/testing/selftests/net/ioam6_parser.c | 101 +++++++++++---------- 2 files changed, 73 insertions(+), 66 deletions(-)
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index fe59ca3e5596..12491850ae98 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -367,14 +367,12 @@ run_test() local desc=$2 local node_src=$3 local node_dst=$4 - local ip6_src=$5 - local ip6_dst=$6 - local if_dst=$7 - local trace_type=$8 - local ioam_ns=$9 - - ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \ - $trace_type $ioam_ns & + local ip6_dst=$5 + local trace_type=$6 + local ioam_ns=$7 + local type=$8 + + ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & local spid=$! sleep 0.1
@@ -489,7 +487,7 @@ out_undef_ns() trace prealloc type 0x800000 ns 0 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0x800000 0 + db01::1 0x800000 0 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -509,7 +507,7 @@ out_no_room() trace prealloc type 0xc00000 ns 123 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -543,14 +541,14 @@ out_bits() if [ $cmd_res != 0 ] then npassed=$((npassed+1)) - log_test_passed "$descr" + log_test_passed "$descr ($1 mode)" else nfailed=$((nfailed+1)) - log_test_failed "$descr" + log_test_failed "$descr ($1 mode)" fi else run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 fi done
@@ -574,7 +572,7 @@ out_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 100 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xfff002 123 + db01::1 0xfff002 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -604,7 +602,7 @@ in_undef_ns() trace prealloc type 0x800000 ns 0 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0x800000 0 + db01::1 0x800000 0 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -624,7 +622,7 @@ in_no_room() trace prealloc type 0xc00000 ns 123 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -651,7 +649,7 @@ in_bits() dev veth0
run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 done
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down @@ -679,7 +677,7 @@ in_oflag() trace prealloc type 0xc00000 ns 123 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
@@ -703,7 +701,7 @@ in_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 80 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xfff002 123 + db01::1 0xfff002 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -731,7 +729,7 @@ fwd_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ - db01::2 db02::2 veth0 0xfff002 123 + db02::2 0xfff002 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down } diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d9d1d4190126..14b354e14d25 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,7 +8,6 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> -#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> #include <stdlib.h> @@ -512,14 +511,6 @@ static int str2id(const char *tname) return -1; }
-static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) -{ - return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; -} - static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -603,70 +594,88 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1; - struct in6_addr src, dst; + int fd, size, hoplen, tid, ret = 1, on = 1; + __u8 buffer[512], is_encap; struct ioam6_hdr *opt; - struct ipv6hdr *ip6h; - __u8 buffer[400], *p; - __u16 ioam_ns; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; __u32 tr_type; + __u16 ioam_ns; + __u8 *ptr;
- if (argc != 7) + if (argc != 5) goto out;
- tid = str2id(argv[2]); + tid = str2id(argv[1]); if (tid < 0 || !func[tid]) goto out;
- if (inet_pton(AF_INET6, argv[3], &src) != 1 || - inet_pton(AF_INET6, argv[4], &dst) != 1) + if (get_u32(&tr_type, argv[2], 16) || + get_u16(&ioam_ns, argv[3], 0)) goto out;
- if (get_u32(&tr_type, argv[5], 16) || - get_u16(&ioam_ns, argv[6], 0)) + if (!strcmp(argv[4], "inline")) + is_encap = 0; + else if (!strcmp(argv[4], "encap")) + is_encap = 1; + else goto out;
- fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); - if (!fd) + fd = socket(PF_INET6, SOCK_RAW, + is_encap ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + if (fd < 0) goto out;
- if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, - argv[1], strlen(argv[1]))) - goto close; + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on));
+ iov.iov_len = 1; + iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); + if (!iov.iov_base) + goto close; recv: - size = recv(fd, buffer, sizeof(buffer), 0); + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buffer; + msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); + + size = recvmsg(fd, &msg, 0); if (size <= 0) goto close;
- ip6h = (struct ipv6hdr *)buffer; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != IPPROTO_IPV6 || + cmsg->cmsg_type != IPV6_HOPOPTS || + cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) + continue;
- if (!ipv6_addr_equal(&ip6h->saddr, &src) || - !ipv6_addr_equal(&ip6h->daddr, &dst)) - goto recv; + ptr = (__u8 *)CMSG_DATA(cmsg);
- if (ip6h->nexthdr != IPPROTO_HOPOPTS) - goto close; + hoplen = (ptr[1] + 1) << 3; + ptr += sizeof(struct ipv6_hopopt_hdr);
- p = buffer + sizeof(*ip6h); - hoplen = (p[1] + 1) << 3; - p += sizeof(struct ipv6_hopopt_hdr); + while (hoplen > 0) { + opt = (struct ioam6_hdr *)ptr;
- while (hoplen > 0) { - opt = (struct ioam6_hdr *)p; + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + ptr += sizeof(*opt); + ret = func[tid](tid, + (struct ioam6_trace_hdr *)ptr, + tr_type, ioam_ns); + goto close; + }
- if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - p += sizeof(*opt); - ret = func[tid](tid, (struct ioam6_trace_hdr *)p, - tr_type, ioam_ns); - break; + ptr += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; } - - p += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; } + + goto recv; close: + free(iov.iov_base); close(fd); out: return ret;
linux-kselftest-mirror@lists.linaro.org