Re: [PATCH bpf-next v9 4/5] bpf: Add selftests for raw syncookie helpers

6 May 2022

On Tue, May 3, 2022 at 10:15 AM Maxim Mikityanskiy maximmi@nvidia.com wrote:
...
This commit adds selftests for the new BPF helpers:
bpf_tcp_raw_{gen,check}_syncookie_ipv{4,6}.
xdp_synproxy_kern.c is a BPF program that generates SYN cookies on
allowed TCP ports and sends SYNACKs to clients, accelerating synproxy
iptables module.
xdp_synproxy.c is a userspace control application that allows to
configure the following options in runtime: list of allowed ports, MSS,
window scale, TTL.
A selftest is added to prog_tests that leverages the above programs to
test the functionality of the new helpers.
Signed-off-by: Maxim Mikityanskiy maximmi@nvidia.com
Reviewed-by: Tariq Toukan tariqt@nvidia.com

selftests should use "selftests/bpf: " subject prefix, not "bpf: ",
please update so it's more obvious that this patch touches selftests
and not kernel-side BPF functionality.
...
tools/testing/selftests/bpf/.gitignore        |   1 +
 tools/testing/selftests/bpf/Makefile          |   5 +-
 .../selftests/bpf/prog_tests/xdp_synproxy.c   | 109 +++
 .../selftests/bpf/progs/xdp_synproxy_kern.c   | 750 ++++++++++++++++++
 tools/testing/selftests/bpf/xdp_synproxy.c    | 418 ++++++++++
 5 files changed, 1281 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
 create mode 100644 tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
 create mode 100644 tools/testing/selftests/bpf/xdp_synproxy.c

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 595565eb68c0..ca2f47f45670 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -43,3 +43,4 @@ test_cpp
 *.tmp
 xdpxceiver
 xdp_redirect_multi
+xdp_synproxy
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index bafdc5373a13..8ae602843b16 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,9 +82,9 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
        flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
        test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \

  xdpxceiver xdp_redirect_multi




  xdpxceiver xdp_redirect_multi xdp_synproxy




-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/xdp_synproxy
# Emit succinct information message describing current building step
 # $1 - generic step name (e.g., CC, LINK, etc);
@@ -500,6 +500,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c      \
                         cap_helpers.c
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
                       $(OUTPUT)/liburandom_read.so                     \

                 $(OUTPUT)/xdp_synproxy                           \



this is the right way to make external binary available to test_progs
flavors, but is there anything inherently requiring external binary
instead of having a helper function doing the same? urandom_read has
to be a separate binary.
...
                   ima_setup.sh                                     \
                   $(wildcard progs/btf_dump_test_case_*.c)

TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
new file mode 100644
index 000000000000..e08b28e25047
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>



+#define SYS(cmd) ({ \

  if (!ASSERT_OK(system(cmd), (cmd))) \


          goto out; \



+})



+#define SYS_OUT(cmd) ({ \

  FILE *f = popen((cmd), "r"); \


  if (!ASSERT_OK_PTR(f, (cmd))) \


          goto out; \


  f; \



+})



+static bool expect_str(char *buf, size_t size, const char *str)
+{

  if (size != strlen(str))


          return false;


  return !memcmp(buf, str, size);



+}



+void test_xdp_synproxy(void)
+{

  int server_fd = -1, client_fd = -1, accept_fd = -1;


  struct nstoken *ns = NULL;


  FILE *ctrl_file = NULL;


  char buf[1024];


  size_t size;



  SYS("ip netns add synproxy");



  SYS("ip link add tmp0 type veth peer name tmp1");


  SYS("ip link set tmp1 netns synproxy");


  SYS("ip link set tmp0 up");


  SYS("ip addr replace 198.18.0.1/24 dev tmp0");



...


  // When checksum offload is enabled, the XDP program sees wrong


  // checksums and drops packets.


  SYS("ethtool -K tmp0 tx off");


  // Workaround required for veth.



don't use C++ comments, please stick to /* */
...

  SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null");



  ns = open_netns("synproxy");


  if (!ASSERT_OK_PTR(ns, "setns"))


          goto out;



  SYS("ip link set lo up");


  SYS("ip link set tmp1 up");


  SYS("ip addr replace 198.18.0.2/24 dev tmp1");


  SYS("sysctl -w net.ipv4.tcp_syncookies=2");


  SYS("sysctl -w net.ipv4.tcp_timestamps=1");


  SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");


  SYS("iptables -t raw -I PREROUTING \


      -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");


  SYS("iptables -t filter -A INPUT \


      -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \


      -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");


  SYS("iptables -t filter -A INPUT \


      -i tmp1 -m state --state INVALID -j DROP");



  ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 --single \


                      --mss4 1460 --mss6 1440 --wscale 7 --ttl 64");


  size = fread(buf, 1, sizeof(buf), ctrl_file);



buf is uninitialized so if fread fail strlen() can cause SIGSEGV or
some other failure mode
...

  pclose(ctrl_file);


  if (!ASSERT_TRUE(expect_str(buf, size, "Total SYNACKs generated: 0\n"),


                   "initial SYNACKs"))


          goto out;



  server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0);


  if (!ASSERT_GE(server_fd, 0, "start_server"))


          goto out;



  close_netns(ns);


  ns = NULL;



  client_fd = connect_to_fd(server_fd, 10000);


  if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))


          goto out;



  accept_fd = accept(server_fd, NULL, NULL);


  if (!ASSERT_GE(accept_fd, 0, "accept"))


          goto out;



  ns = open_netns("synproxy");


  if (!ASSERT_OK_PTR(ns, "setns"))


          goto out;



  ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single");


  size = fread(buf, 1, sizeof(buf), ctrl_file);


  pclose(ctrl_file);


  if (!ASSERT_TRUE(expect_str(buf, size, "Total SYNACKs generated: 1\n"),


                   "SYNACKs after connection"))



please use ASSERT_STREQ instead, same above
...

          goto out;




+out:

  if (accept_fd >= 0)


          close(accept_fd);


  if (client_fd >= 0)


          close(client_fd);


  if (server_fd >= 0)


          close(server_fd);


  if (ns)


          close_netns(ns);



  system("ip link del tmp0");


  system("ip netns del synproxy");



+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
new file mode 100644
index 000000000000..9ae85b189072
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -0,0 +1,750 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
Can you please elaborate on what Linux-OpenIB license is and why
GPL-2.0 isn't enough? We usually have GPL-2.0 or LGPL-2.1 OR
BSD-2-Clause
...
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */



+#include "vmlinux.h"



+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <asm/errno.h>



[...]
...



+static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,

                                         __u32 len, __u8 proto,


                                         __u32 csum)



+{

  __u64 s = csum;



  s += (__u32)saddr;


  s += (__u32)daddr;



+#if defined(__BIG_ENDIAN__)

  s += proto + len;



+#elif defined(__LITTLE_ENDIAN__)
I've got few nudges in libbpf code base previously to use
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
instead (I don't remember the exact reason now, but there was a
reason). Let's do the same here for consistency?
...

  s += (proto + len) << 8;



+#else
+#error Unknown endian
+#endif

  s = (s & 0xffffffff) + (s >> 32);


  s = (s & 0xffffffff) + (s >> 32);



  return csum_fold((__u32)s);



+}



+static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,

                                       const struct in6_addr *daddr,


                                       __u32 len, __u8 proto, __u32 csum)



+{

  __u64 sum = csum;


  int i;




+#pragma unroll

  for (i = 0; i < 4; i++)


          sum += (__u32)saddr->in6_u.u6_addr32[i];




+#pragma unroll
why unroll? BPF verifier handles such loops just fine, even if
compiler decides to not unroll them
...

  for (i = 0; i < 4; i++)


          sum += (__u32)daddr->in6_u.u6_addr32[i];



  // Don't combine additions to avoid 32-bit overflow.


  sum += bpf_htonl(len);


  sum += bpf_htonl(proto);



  sum = (sum & 0xffffffff) + (sum >> 32);


  sum = (sum & 0xffffffff) + (sum >> 32);



  return csum_fold((__u32)sum);



+}



+static __always_inline __u64 tcp_clock_ns(void)
__always_inline isn't mandatory, you can just have static __u64
tcp_clock_ns() here and let compiler decide on inlining? same for
below
...
+{

  return bpf_ktime_get_ns();



+}



+static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
+{

  return ns / (NSEC_PER_SEC / TCP_TS_HZ);



+}



+static __always_inline __u32 tcp_time_stamp_raw(void)
+{

  return tcp_ns_to_ts(tcp_clock_ns());



+}



[...]
...
+static __always_inline void values_inc_synacks(void)
+{

  __u32 key = 1;


  __u32 *value;



  value = bpf_map_lookup_elem(&values, &key);


  if (value)


          __sync_fetch_and_add(value, 1);



+}



+static __always_inline bool check_port_allowed(__u16 port)
+{

  __u32 i;



  for (i = 0; i < MAX_ALLOWED_PORTS; i++) {


          __u32 key = i;


          __u16 *value;



          value = bpf_map_lookup_elem(&allowed_ports, &key);



          if (!value)


                  break;


          // 0 is a terminator value. Check it first to avoid matching on


          // a forbidden port == 0 and returning true.



please no C++ comments (everywhere)
...

          if (*value == 0)


                  break;



          if (*value == port)


                  return true;


  }



  return false;



+}



[...]

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH bpf-next v9 4/5] bpf: Add selftests for raw syncookie helpers