From: Vegard Nossum vegard.nossum@oracle.com
This adds some basic self-testing infrastructure for RDS-TCP.
Signed-off-by: Vegard Nossum vegard.nossum@oracle.com Signed-off-by: Chuck Lever chuck.lever@oracle.com Signed-off-by: Allison Henderson allison.henderson@oracle.com --- Documentation/dev-tools/gcov.rst | 11 + MAINTAINERS | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/rds/Makefile | 12 + tools/testing/selftests/net/rds/README.txt | 41 ++++ tools/testing/selftests/net/rds/config.sh | 53 +++++ tools/testing/selftests/net/rds/run.sh | 224 ++++++++++++++++++ tools/testing/selftests/net/rds/test.py | 262 +++++++++++++++++++++ 8 files changed, 605 insertions(+)
diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst index 5fce2b06f229..dbd26b02ff3c 100644 --- a/Documentation/dev-tools/gcov.rst +++ b/Documentation/dev-tools/gcov.rst @@ -75,6 +75,17 @@ Only files which are linked to the main kernel image or are compiled as kernel modules are supported by this mechanism.
+Module specific configs +----------------------- + +Gcov kernel configs for specific modules are described below: + +CONFIG_GCOV_PROFILE_RDS: + Enables GCOV profiling on RDS for checking which functions or + lines are executed. This config is used by the rds selftest to + generate coverage reports. If left unset the report is omitted. + + Files -----
diff --git a/MAINTAINERS b/MAINTAINERS index 8766f3e5e87e..8e0324b964f5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19195,6 +19195,7 @@ S: Supported W: https://oss.oracle.com/projects/rds/ F: Documentation/networking/rds.rst F: net/rds/ +F: tools/testing/selftests/net/rds/
RDT - RESOURCE ALLOCATION M: Fenghua Yu fenghua.yu@intel.com diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index bc8fe9e8f7f2..a5f1c0c27dff 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -68,6 +68,7 @@ TARGETS += net/mptcp TARGETS += net/openvswitch TARGETS += net/tcp_ao TARGETS += net/netfilter +TARGETS += net/rds TARGETS += nsfs TARGETS += perf_events TARGETS += pidfd diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile new file mode 100644 index 000000000000..da9714bc7aad --- /dev/null +++ b/tools/testing/selftests/net/rds/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 + +all: + @echo mk_build_dir="$(shell pwd)" > include.sh + +TEST_PROGS := run.sh \ + include.sh \ + test.py + +EXTRA_CLEAN := /tmp/rds_logs + +include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt new file mode 100644 index 000000000000..cbde2951ab13 --- /dev/null +++ b/tools/testing/selftests/net/rds/README.txt @@ -0,0 +1,41 @@ +RDS self-tests +============== + +These scripts provide a coverage test for RDS-TCP by creating two +network namespaces and running rds packets between them. A loopback +network is provisioned with optional probability of packet loss or +corruption. A workload of 50000 hashes, each 64 characters in size, +are passed over an RDS socket on this test network. A passing test means +the RDS-TCP stack was able to recover properly. The provided config.sh +can be used to compile the kernel with the necessary gcov options. The +kernel may optionally be configured to omit the coverage report as well. + +USAGE: + run.sh [-d logdir] [-l packet_loss] [-c packet_corruption] + [-u packet_duplcate] + +OPTIONS: + -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs + + -l Simulates a percentage of packet loss + + -c Simulates a percentage of packet corruption + + -u Simulates a percentage of packet duplication. + +EXAMPLE: + + # Create a suitable gcov enabled .config + tools/testing/selftests/net/rds/config.sh -g + + # Alternatly create a gcov disabled .config + tools/testing/selftests/net/rds/config.sh + + # build the kernel + vng --build --config tools/testing/selftests/net/config + + # launch the tests in a VM + vng -v --rwdir ./ --run . --user root --cpus 4 -- \ + "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh" + +An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/. diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh new file mode 100755 index 000000000000..791c8dbe1095 --- /dev/null +++ b/tools/testing/selftests/net/rds/config.sh @@ -0,0 +1,53 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u +set -x + +unset KBUILD_OUTPUT + +GENERATE_GCOV_REPORT=0 +while getopts "g" opt; do + case ${opt} in + g) + GENERATE_GCOV_REPORT=1 + ;; + :) + echo "USAGE: config.sh [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + +CONF_FILE="tools/testing/selftests/net/config" + +# no modules +scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES + +# enable RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + # instrument RDS and only RDS + scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS +else + scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS +fi + +# need network namespaces to run tests with veth network interfaces +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS +scripts/config --file "$CONF_FILE" --enable CONFIG_VETH + +# simulate packet loss +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM + diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh new file mode 100755 index 000000000000..8aee244f582a --- /dev/null +++ b/tools/testing/selftests/net/rds/run.sh @@ -0,0 +1,224 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u + +unset KBUILD_OUTPUT + +current_dir="$(realpath "$(dirname "$0")")" +build_dir="$current_dir" + +build_include="$current_dir/include.sh" +if test -f "$build_include"; then + # this include will define "$mk_build_dir" as the location the test was + # built. We will need this if the tests are installed in a location + # other than the kernel source + + source "$build_include" + build_dir="$mk_build_dir" +fi + +# This test requires kernel source and the *.gcda data therein +# Locate the top level of the kernel source, and the net/rds +# subfolder with the appropriate *.gcno object files +ksrc_dir="$(realpath "$build_dir"/../../../../../)" +kconfig="$ksrc_dir/.config" +obj_dir="$ksrc_dir/net/rds" + +GCOV_CMD=gcov + +#check to see if the host has the required packages to generate a gcov report +check_gcov_env() +{ + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find gcov. " + GENERATE_GCOV_REPORT=0 + return + fi + + # the gcov version must match the gcc version + GCC_VER=$(gcc -dumpfullversion) + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + #attempt to find a matching gcov version + GCOV_CMD=gcov-$(gcc -dumpversion) + + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + return + fi + + #recheck version number of found gcov executable + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \ + awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + else + echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD" + fi + fi +} + +# Check to see if the kconfig has the required configs to generate a coverage report +check_gcov_conf() +{ + if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + + if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then + echo "To enable gcov reports, please run "\ + ""tools/testing/selftests/net/rds/config.sh -g" and rebuild the kernel" + else + # if we have the required kernel configs, proceed to check the environment to + # ensure we have the required gcov packages + check_gcov_env + fi +} + +# Kselftest framework requirement - SKIP code is 4. +check_conf_enabled() { + if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 enabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf_disabled() { + if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 disabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf() { + check_conf_enabled CONFIG_NET_SCH_NETEM + check_conf_enabled CONFIG_VETH + check_conf_enabled CONFIG_NET_NS + check_conf_enabled CONFIG_RDS_TCP + check_conf_enabled CONFIG_RDS + check_conf_disabled CONFIG_MODULES +} + +check_env() +{ + if ! test -d "$obj_dir"; then + echo "selftests: [SKIP] This test requires a kernel source tree" + exit 4 + fi + if ! test -e "$kconfig"; then + echo "selftests: [SKIP] This test requires a configured kernel source tree" + exit 4 + fi + if ! which strace > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without strace" + exit 4 + fi + if ! which tcpdump > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without tcpdump" + exit 4 + fi + + if ! which python3 > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without python3" + exit 4 + fi + + python_major=$(python3 -c "import sys; print(sys.version_info[0])") + python_minor=$(python3 -c "import sys; print(sys.version_info[1])") + if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then + echo "selftests: [SKIP] Could not run test without at least python3.9" + python3 -V + exit 4 + fi +} + +LOG_DIR="$current_dir"/rds_logs +PLOSS=0 +PCORRUPT=0 +PDUP=0 +GENERATE_GCOV_REPORT=1 +while getopts "d:l:c:u:" opt; do + case ${opt} in + d) + LOG_DIR=${OPTARG} + ;; + l) + PLOSS=${OPTARG} + ;; + c) + PCORRUPT=${OPTARG} + ;; + u) + PDUP=${OPTARG} + ;; + :) + echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \ + "[-u packet_duplcate] [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + + +check_env +check_conf +check_gcov_conf + + +rm -fr "$LOG_DIR" +TRACE_FILE="${LOG_DIR}/rds-strace.txt" +COVR_DIR="${LOG_DIR}/coverage/" +mkdir -p "$LOG_DIR" +mkdir -p "$COVR_DIR" + +set +e +echo running RDS tests... +echo Traces will be logged to "$TRACE_FILE" +rm -f "$TRACE_FILE" +strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \ + -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" + +test_rc=$? +dmesg > "${LOG_DIR}/dmesg.out" + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + echo saving coverage data... + (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \ + while read -r f + do + cat < "/sys/kernel/debug/gcov/$f" > "/$f" + done) + + echo running gcovr... + gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \ + -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/" +else + echo "Coverage report will be skipped" +fi + +if [ "$test_rc" -eq 0 ]; then + echo "PASS: Test completed successfully" +else + echo "FAIL: Test failed" +fi + +exit "$test_rc" diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py new file mode 100644 index 000000000000..e6bb109bcead --- /dev/null +++ b/tools/testing/selftests/net/rds/test.py @@ -0,0 +1,262 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import argparse +import ctypes +import errno +import hashlib +import os +import select +import signal +import socket +import subprocess +import sys +import atexit +from pwd import getpwuid +from os import stat +from lib.py import ip + + +libc = ctypes.cdll.LoadLibrary('libc.so.6') +setns = libc.setns + +net0 = 'net0' +net1 = 'net1' + +veth0 = 'veth0' +veth1 = 'veth1' + +# Helper function for creating a socket inside a network namespace. +# We need this because otherwise RDS will detect that the two TCP +# sockets are on the same interface and use the loop transport instead +# of the TCP transport. +def netns_socket(netns, *args): + u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + + child = os.fork() + if child == 0: + # change network namespace + with open(f'/var/run/netns/{netns}') as f: + try: + ret = setns(f.fileno(), 0) + except IOError as e: + print(e.errno) + print(e) + + # create socket in target namespace + s = socket.socket(*args) + + # send resulting socket to parent + socket.send_fds(u0, [], [s.fileno()]) + + sys.exit(0) + + # receive socket from child + _, s, _, _ = socket.recv_fds(u1, 0, 1) + os.waitpid(child, 0) + u0.close() + u1.close() + return socket.fromfd(s[0], *args) + +def signal_handler(sig, frame): + print('Test timed out') + sys.exit(1) + +#Parse out command line arguments. We take an optional +# timeout parameter and an optional log output folder +parser = argparse.ArgumentParser(description="init script args", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-d", "--logdir", action="store", + help="directory to store logs", default="/tmp") +parser.add_argument('--timeout', help="timeout to terminate hung test", + type=int, default=0) +parser.add_argument('-l', '--loss', help="Simulate tcp packet loss", + type=int, default=0) +parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption", + type=int, default=0) +parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", + type=int, default=0) +args = parser.parse_args() +logdir=args.logdir +packet_loss=str(args.loss)+'%' +packet_corruption=str(args.corruption)+'%' +packet_duplicate=str(args.duplicate)+'%' + +ip(f"netns add {net0}") +ip(f"netns add {net1}") +ip(f"link add type veth") + +addrs = [ + # we technically don't need different port numbers, but this will + # help identify traffic in the network analyzer + ('10.0.0.1', 10000), + ('10.0.0.2', 20000), +] + +# move interfaces to separate namespaces so they can no longer be +# bound directly; this prevents rds from switching over from the tcp +# transport to the loop transport. +ip(f"link set {veth0} netns {net0} up") +ip(f"link set {veth1} netns {net1} up") + + + +# add addresses +ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}") +ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}") + +# add routes +ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}") +ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}") + +# sanity check that our two interfaces/addresses are correctly set up +# and communicating by doing a single ping +ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}") + +# Start a packet capture on each network +for net in [net0, net1]: + tcpdump_pid = os.fork() + if tcpdump_pid == 0: + pcap = logdir+'/'+net+'.pcap' + subprocess.check_call(['touch', pcap]) + user = getpwuid(stat(pcap).st_uid).pw_name + ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}") + sys.exit(0) + +# simulate packet loss, duplication and corruption +for net, iface in [(net0, veth0), (net1, veth1)]: + ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \ + corrupt {packet_corruption} loss {packet_loss} duplicate \ + {packet_duplicate}") + +# add a timeout +if args.timeout > 0: + signal.alarm(args.timeout) + signal.signal(signal.SIGALRM, signal_handler) + +sockets = [ + netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET), +] + +for s, addr in zip(sockets, addrs): + s.bind(addr) + s.setblocking(0) + +fileno_to_socket = { + s.fileno(): s for s in sockets +} + +addr_to_socket = { + addr: s for addr, s in zip(addrs, sockets) +} + +socket_to_addr = { + s: addr for addr, s in zip(addrs, sockets) +} + +send_hashes = {} +recv_hashes = {} + +ep = select.epoll() + +for s in sockets: + ep.register(s, select.EPOLLRDNORM) + +n = 50000 +nr_send = 0 +nr_recv = 0 + +while nr_send < n: + # Send as much as we can without blocking + print("sending...", nr_send, nr_recv) + while nr_send < n: + send_data = hashlib.sha256( + f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8') + + # pseudo-random send/receive pattern + sender = sockets[nr_send % 2] + receiver = sockets[1 - (nr_send % 3) % 2] + + try: + sender.sendto(send_data, socket_to_addr[receiver]) + send_hashes.setdefault((sender.fileno(), receiver.fileno()), + hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8')) + nr_send = nr_send + 1 + except BlockingIOError as e: + break + except OSError as e: + if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]: + break + raise + + # Receive as much as we can without blocking + print("receiving...", nr_send, nr_recv) + while nr_recv < nr_send: + for fileno, eventmask in ep.poll(): + receiver = fileno_to_socket[fileno] + + if eventmask & select.EPOLLRDNORM: + while True: + try: + recv_data, address = receiver.recvfrom(1024) + sender = addr_to_socket[address] + recv_hashes.setdefault((sender.fileno(), + receiver.fileno()), hashlib.sha256()).update( + f'<{recv_data}>'.encode('utf-8')) + nr_recv = nr_recv + 1 + except BlockingIOError as e: + break + + # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() + for net in [net0, net1]: + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") + +print("done", nr_send, nr_recv) + +# the Python socket module doesn't know these +RDS_INFO_FIRST = 10000 +RDS_INFO_LAST = 10017 + +nr_success = 0 +nr_error = 0 + +for s in sockets: + for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1): + # Sigh, the Python socket module doesn't allow us to pass + # buffer lengths greater than 1024 for some reason. RDS + # wants multiple pages. + try: + s.getsockopt(socket.SOL_RDS, optname, 1024) + nr_success = nr_success + 1 + except OSError as e: + nr_error = nr_error + 1 + if e.errno == errno.ENOSPC: + # ignore + pass + +print(f"getsockopt(): {nr_success}/{nr_error}") + +print("Stopping network packet captures") +subprocess.check_call(['killall', '-q', 'tcpdump']) + +# We're done sending and receiving stuff, now let's check if what +# we received is what we sent. +for (sender, receiver), send_hash in send_hashes.items(): + recv_hash = recv_hashes.get((sender, receiver)) + + if recv_hash is None: + print("FAIL: No data received") + sys.exit(1) + + if send_hash.hexdigest() != recv_hash.hexdigest(): + print("FAIL: Send/recv mismatch") + print("hash expected:", send_hash.hexdigest()) + print("hash received:", recv_hash.hexdigest()) + sys.exit(1) + + print(f"{sender}/{receiver}: ok") + +print("Success") +sys.exit(0)