On Mon, Apr 1, 2019 at 11:33 AM Alan Maguire alan.maguire@oracle.com wrote:
Update test_tc_tunnel to verify adding inner L2 header encapsulation (an MPLS label) works.
Signed-off-by: Alan Maguire alan.maguire@oracle.com
@@ -84,23 +95,39 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto) return TC_ACT_OK;
olen = sizeof(h_outer.ip);
elen = 0;
nit: could you pick a (slightly) more descriptive name? I don't get what e is an abbreviation for here.
flags = BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
if (l2_proto == ETH_P_MPLS_UC) {
elen = sizeof(mpls_label);
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(elen);
}
switch (encap_proto) { case IPPROTO_GRE:
To verify that this L2 encap method is generic, it would be useful to also test IPPROTO_GRE + ETH_P_TEB, if that's not too much work to add.
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE | BPF_F_ADJ_ROOM_FIXED_GSO; olen += sizeof(h_outer.l4hdr.gre);
h_outer.l4hdr.gre.protocol = bpf_htons(ETH_P_IP);
h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); h_outer.l4hdr.gre.flags = 0; break; case IPPROTO_UDP: flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; olen += sizeof(h_outer.l4hdr.udp);
h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
h_outer.l4hdr.udp.dest = __bpf_constant_htons(cfg_udp_dst); h_outer.l4hdr.udp.check = 0; h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
sizeof(h_outer.l4hdr.udp));
sizeof(h_outer.l4hdr.udp) +
elen);
h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
switch (l2_proto) {
case ETH_P_IP:
dst = cfg_udp_dst;
break;
case ETH_P_MPLS_UC:
dst = cfg_mplsudp_dst;
break;
}
h_outer.l4hdr.udp.dest = bpf_htons(dst);
nit: more concise:
h_outer.l4hdr.udp.dest = bpf_htons(l2_proto == ETH_P_IP ? cfg_udp_dst : cfg_mplsudp_dst);
break; case IPPROTO_IPIP: break;
@@ -108,6 +135,13 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto) return TC_ACT_OK; }
/* add L2 encap (if specified) */
if (l2_proto == ETH_P_MPLS_UC)
__builtin_memcpy((__u8 *)&h_outer + olen, &mpls_label,
sizeof(mpls_label));
nit: no need for memcpy, can cast to __u32 and use regular integer assignment.
olen += elen;
/* add room between mac and network header */ if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) return TC_ACT_SHOT;
@@ -124,18 +158,19 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto) if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, BPF_F_INVALIDATE_HASH) < 0) return TC_ACT_SHOT;
nit: irrelevant change
-if [[ "$#" -ne "3" ]]; then +if [[ "$#" -ne "4" ]]; then echo "Usage: $0"
echo " or: $0 <ipv4|ipv6> <tuntype> <data_len>"
echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls> <data_len>" exit 1
fi
@@ -148,9 +150,10 @@ case "$1" in esac
readonly tuntype=$2 -readonly datalen=$3 +readonly mactype=$3 +readonly datalen=$4
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}" +echo "encap ${addr1} to ${addr2}, tun ${tuntype} mac ${mactype} len ${datalen}"
trap cleanup EXIT
@@ -167,7 +170,7 @@ verify_data ip netns exec "${ns1}" tc qdisc add dev veth1 clsact ip netns exec "${ns1}" tc filter add dev veth1 egress \ bpf direct-action object-file ./test_tc_tunnel.o \
section "encap_${tuntype}"
section "encap_${tuntype}_${mactype}"
echo "test bpf encap without decap (expect failure)" server_listen ! client_connect @@ -176,11 +179,11 @@ server_listen # server is still running # client can connect again
-# Skip tunnel tests for ip6udp. For IPv6, a UDP checksum is required -# and there seems to be no way to tell a fou6 tunnel to allow 0 -# checksums. Accordingly for both these cases, we skip tests against -# tunnel peer, and test encap using BPF decap only. -if [[ "$tuntype" != "ip6udp" ]]; then +# Skip tunnel tests for L2 encap and ip6udp. For IPv6, a UDP checksum +# is required and there seems to be no way to tell a fou6 tunnel to +# allow 0 checksums.
Please update comment for L2 encap: why can we not test with a device for MPLS decap? I am not suggesting adding it if complex. But note that I added the tunnel device decap before the bpf decap especially to have some verification that our logic matches that generated by real tunnel devices. With our own BPF code on both sides, that cannot be tested.