It is possible to create cycles using bpf_redirect_peer which lead to an an infinite loop inside __netif_receive_skb_core. The simplest way to illustrate this is by attaching a TC program to the ingress hook on both sides of a veth or netkit device pair which redirects to its own peer, although other cycles are possible. This patch places an upper limit on the number of iterations allowed inside __netif_receive_skb_core to prevent this.
Signed-off-by: Jordan Rife jrife@google.com Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper") Cc: stable@vger.kernel.org --- net/core/dev.c | 11 +++- net/core/dev.h | 1 + .../selftests/bpf/prog_tests/tc_redirect.c | 51 +++++++++++++++++++ .../selftests/bpf/progs/test_tc_peer.c | 13 +++++ 4 files changed, 75 insertions(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c index cd479f5f22f6..753f8d27f47c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5455,6 +5455,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, struct net_device *orig_dev; bool deliver_exact = false; int ret = NET_RX_DROP; + int loops = 0; __be16 type;
net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb); @@ -5521,8 +5522,16 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, nf_skip_egress(skb, true); skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, &another); - if (another) + if (another) { + loops++; + if (unlikely(loops == RX_LOOP_LIMIT)) { + ret = NET_RX_DROP; + net_crit_ratelimited("bpf: loop limit reached on datapath, buggy bpf program?\n"); + goto out; + } + goto another_round; + } if (!skb) goto out;
diff --git a/net/core/dev.h b/net/core/dev.h index 5654325c5b71..28d1cf2f9069 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -150,6 +150,7 @@ struct napi_struct *napi_by_id(unsigned int napi_id); void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
#define XMIT_RECURSION_LIMIT 8 +#define RX_LOOP_LIMIT 8
#ifndef CONFIG_PREEMPT_RT static inline bool dev_xmit_recursion(void) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index c85798966aec..db1b36090d6c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -1081,6 +1081,55 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) close_netns(nstoken); }
+static void test_tc_redirect_peer_loop(struct netns_setup_result *setup_result) +{ + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src); + struct test_tc_peer *skel; + struct nstoken *nstoken; + int err; + + /* Set up an infinite redirect loop using bpf_redirect_peer with ingress + * hooks on either side of a veth/netkit pair redirecting to its own + * peer. This should not lock up the kernel. + */ + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + return; + + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + goto done; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src; + skel->rodata->IFINDEX_SRC_FWD = setup_result->ifindex_src_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto done; + + QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src); + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.tc_src_self, 0); + close_netns(nstoken); + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.tc_src_fwd_self, 0); + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + + SYS_NOFAIL("ip netns exec " NS_SRC " %s -c 1 -W 1 -q %s > /dev/null", + ping_command(AF_INET), IP4_DST); +fail: +done: + if (skel) + test_tc_peer__destroy(skel); + close_netns(nstoken); +} + static int tun_open(char *name) { struct ifreq ifr; @@ -1280,6 +1329,8 @@ static void *test_tc_redirect_run_tests(void *arg) RUN_TEST(tc_redirect_peer, MODE_VETH); RUN_TEST(tc_redirect_peer, MODE_NETKIT); RUN_TEST(tc_redirect_peer_l3, MODE_VETH); + RUN_TEST(tc_redirect_peer_loop, MODE_VETH); + RUN_TEST(tc_redirect_peer_loop, MODE_NETKIT); RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT); RUN_TEST(tc_redirect_neigh, MODE_VETH); RUN_TEST(tc_redirect_neigh_fib, MODE_VETH); diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index 365eacb5dc34..9b8a00ccad42 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -10,6 +10,7 @@
#include <bpf/bpf_helpers.h>
+volatile const __u32 IFINDEX_SRC_FWD; volatile const __u32 IFINDEX_SRC; volatile const __u32 IFINDEX_DST;
@@ -34,6 +35,18 @@ int tc_src(struct __sk_buff *skb) return bpf_redirect_peer(IFINDEX_DST, 0); }
+SEC("tc") +int tc_src_self(struct __sk_buff *skb) +{ + return bpf_redirect_peer(IFINDEX_SRC, 0); +} + +SEC("tc") +int tc_src_fwd_self(struct __sk_buff *skb) +{ + return bpf_redirect_peer(IFINDEX_SRC_FWD, 0); +} + SEC("tc") int tc_dst_l3(struct __sk_buff *skb) {