On Fri, Aug 25, 2023 at 11:08:30AM +0200, Sriram Yagnaraman wrote:
Create a topology with a host, and a router. The host (veth0) is in the default namespace, and a network namespace is created for the router, the peer veth1 is moved to the router netns. A dummy interface is added inside the router netns, to simulate a network that has two neighbors. An ECMP route to a virtual IP (vip) with the two neighbors as the next hop is added.
The test uses perf stat to count the number of fib:fib_table_lookup tracepoint hits for IPv4 and the number of fib6:fib6_table_lookup for IPv6. The measured count is checked to be within 15% for the number of packets received on veth1 in the router.
See diagram in the test for more information.
Suggested-by: Ido Schimmel idosch@nvidia.com Signed-off-by: Sriram Yagnaraman sriram.yagnaraman@est.tech
.../testing/selftests/net/forwarding/Makefile | 1 + tools/testing/selftests/net/forwarding/lib.sh | 5 + .../net/forwarding/router_multipath_vip.sh | 255 ++++++++++++++++++
I believe this test better fits in fib_tests.sh. Here's a diff that uses the previously mentioned method [1]. I verified that it fails without the fixes and passes with them:
Before:
IPv4 multipath list receive tests TEST: Multipath route hit ratio (.15) [FAIL] TEST: Single path route hit ratio (.15) [ OK ]
IPv6 multipath list receive tests TEST: Multipath route hit ratio (.15) [FAIL] TEST: Single path route hit ratio (.14) [ OK ]
After:
IPv4 multipath list receive tests TEST: Multipath route hit ratio (.99) [ OK ] TEST: Single path route hit ratio (.15) [ OK ]
IPv6 multipath list receive tests TEST: Multipath route hit ratio (.99) [ OK ] TEST: Single path route hit ratio (.14) [ OK ]
[1] diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 35d89dfa6f11..5ffbfdad943c 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,7 +9,7 @@ ret=0 ksft_skip=4
# all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh" +TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh ipv4_mpath_list ipv6_mpath_list"
VERBOSE=0 PAUSE_ON_FAIL=no @@ -2138,6 +2138,143 @@ ipv4_bcast_neigh_test() cleanup }
+mpath_dep_check() +{ + if [ ! -x "$(command -v mausezahn)" ]; then + echo "mausezahn command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v bc)" ]; then + echo "bc command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v perf)" ]; then + echo "perf command not found. Skipping test" + return 1 + fi + + perf list fib:* | grep -q fib_table_lookup + if [ $? -ne 0 ]; then + echo "IPv4 FIB tracepoint not found. Skipping test" + return 1 + fi + + perf list fib6:* | grep -q fib6_table_lookup + if [ $? -ne 0 ]; then + echo "IPv6 FIB tracepoint not found. Skipping test" + return 1 + fi + + return 0 +} + +list_rcv_eval() +{ + local name=$1; shift + local file=$1; shift + local exp=$1; shift + + + local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor') + local ratio=$(echo "scale=2; $count / 65536" | bc -l) + local res=$(echo "$ratio $exp" | bc) + [[ $res -eq 1 ]] + log_test $? 0 "$name route hit ratio ($ratio)" +} + +ipv4_mpath_list_test() +{ + echo + echo "IPv4 multipath list receive tests" + + mpath_dep_check || return 1 + + route_setup + + set -e + run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec ns2 bash -c "echo 20000 > /sys/class/net/veth2/gro_flush_timeout"" + run_cmd "ip netns exec ns2 bash -c "echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs"" + run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n ns2 link add name nh1 up type dummy" + run_cmd "ip -n ns2 link add name nh2 up type dummy" + run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1" + run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2" + run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + set +e + + local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local tmp_file=$(mktemp) + + # Packets forwarded in a list using a multipath route must not reuse a + # cached result so that a flow always hits the same nexthop. In other + # words, the FIB lookup tracepoint needs to be triggered for every + # packet. + run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- ip netns exec ns1 mausezahn veth1 -a own -b $dmac -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q" + list_rcv_eval "Multipath" $tmp_file ">= 0.95" + + # The same is not true for a single path route. + run_cmd "ip -n ns2 route replace 203.0.113.0/24 nexthop via 172.16.201.2" + run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- ip netns exec ns1 mausezahn veth1 -a own -b $dmac -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q" + list_rcv_eval "Single path" $tmp_file "< 0.95" + + rm $tmp_file + route_cleanup +} + +ipv6_mpath_list_test() +{ + echo + echo "IPv6 multipath list receive tests" + + mpath_dep_check || return 1 + + route_setup + + set -e + run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec ns2 bash -c "echo 20000 > /sys/class/net/veth2/gro_flush_timeout"" + run_cmd "ip netns exec ns2 bash -c "echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs"" + run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n ns2 link add name nh1 up type dummy" + run_cmd "ip -n ns2 link add name nh2 up type dummy" + run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1" + run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2" + run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64 nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" + set +e + + local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local tmp_file=$(mktemp) + + # Packets forwarded in a list using a multipath route must not reuse a + # cached result so that a flow always hits the same nexthop. In other + # words, the FIB lookup tracepoint needs to be triggered for every + # packet. + run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q" + list_rcv_eval "Multipath" $tmp_file ">= 0.95" + + # The same is not true for a single path route. + run_cmd "ip -n ns2 route replace 2001:db8:301::/64 nexthop via 2001:db8:201::2" + run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q" + list_rcv_eval "Single path" $tmp_file "< 0.95" + + rm $tmp_file + route_cleanup +} + ################################################################################ # usage
@@ -2217,6 +2354,8 @@ do ipv4_mangle) ipv4_mangle_test;; ipv6_mangle) ipv6_mangle_test;; ipv4_bcast_neigh) ipv4_bcast_neigh_test;; + ipv4_mpath_list) ipv4_mpath_list_test;; + ipv6_mpath_list) ipv6_mpath_list_test;;
help) echo "Test names: $TESTS"; exit 0;; esac