- Linux-kselftest-mirror - lists.linaro.org

[PATCH] selftests: mlxsw: Fix typo in qos_mc_aware.sh

by Masanari Iida

This patch fixes some spelling typo in qos_mc_aware.sh Signed-off-by: Masanari Iida <standby24x7(a)gmail.com> --- tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 71231ad2dbfb..47315fe48d5a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -262,7 +262,7 @@ test_mc_aware() stop_traffic - log_test "UC performace under MC overload" + log_test "UC performance under MC overload" echo "UC-only throughput $(humanize $ucth1)" echo "UC+MC throughput $(humanize $ucth2)" @@ -316,7 +316,7 @@ test_uc_aware() stop_traffic - log_test "MC performace under UC overload" + log_test "MC performance under UC overload" echo " ingress UC throughput $(humanize ${uc_ir})" echo " egress UC throughput $(humanize ${uc_er})" echo " sent $attempts BC ARPs, got $passes responses" -- 2.22.0.545.g9c9b961d7eb1

6 years, 3 months

3
2
0 0

[PATCH 5/5] pidfd: add pidfd_wait tests

by Christian Brauner

Add tests for pidfd_wait() and CLONE_WAIT_PID: - test that pidfd_wait() syscall is supported - test that pidfd_wait() does not accept unknown flags - test that pidfd_wait() can wait on a pidfd - test that pidfd_wait() can wait on a pidfd and return struct rusage - test that pidfd_wait() can wait on a pidfd and return siginfo_t - test that pidfd_wait() works with WEXITED - test that pidfd_wait() works with WSTOPPED - test that pidfd_wait() works with WUNTRACED - test that pidfd_wait() works with WCONTINUED - test that pidfd_wait() works with WNOWAIT - test that pidfd_wait() works with WNOHANG - test that CLONE_WAIT_PID does not appear in waitid(P_ALL) requests - test that CLONE_WAIT_PID does not appear in waitid(P_PGID) requests - test that CLONE_WAIT_PID does not appear in wait4(-1) requests - test that CLONE_WAIT_PID does not appear in wait4(0) requests - test that CLONE_WAIT_PID does appear in waitid(P_PID) requests - test that CLONE_WAIT_PID does appear in wait4(pid) requests Signed-off-by: Christian Brauner <christian(a)brauner.io> Cc: Arnd Bergmann <arnd(a)arndb.de> Cc: "Eric W. Biederman" <ebiederm(a)xmission.com> Cc: Kees Cook <keescook(a)chromium.org> Cc: Joel Fernandes (Google) <joel(a)joelfernandes.org> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: David Howells <dhowells(a)redhat.com> Cc: Jann Horn <jannh(a)google.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Andy Lutomirsky <luto(a)kernel.org> Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Oleg Nesterov <oleg(a)redhat.com> Cc: Aleksa Sarai <cyphar(a)cyphar.com> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: linux-kselftest(a)vger.kernel.org --- tools/testing/selftests/pidfd/pidfd.h | 25 ++ tools/testing/selftests/pidfd/pidfd_test.c | 14 - tools/testing/selftests/pidfd/pidfd_wait.c | 398 +++++++++++++++++++++ 3 files changed, 423 insertions(+), 14 deletions(-) create mode 100644 tools/testing/selftests/pidfd/pidfd_wait.c diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 8452e910463f..d56bca179b9e 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -16,6 +16,26 @@ #include "../kselftest.h" +#ifndef CLONE_PIDFD +#define CLONE_PIDFD 0x00001000 +#endif + +#ifndef CLONE_WAIT_PID +#define CLONE_WAIT_PID 0x200000000ULL +#endif + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open -1 +#endif + +#ifndef __NR_pidfd_send_signal +#define __NR_pidfd_send_signal -1 +#endif + +#ifndef __NR_clone3 +#define __NR_clone3 -1 +#endif + /* * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c * That means, when it wraps around any pid < 300 will be skipped. @@ -53,5 +73,10 @@ int wait_for_pid(pid_t pid) return WEXITSTATUS(status); } +static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, + unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} #endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 7eaa8a3de262..42e3eb494d72 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -21,20 +21,12 @@ #include "pidfd.h" #include "../kselftest.h" -#ifndef __NR_pidfd_send_signal -#define __NR_pidfd_send_signal -1 -#endif - #define str(s) _str(s) #define _str(s) #s #define CHILD_THREAD_MIN_WAIT 3 /* seconds */ #define MAX_EVENTS 5 -#ifndef CLONE_PIDFD -#define CLONE_PIDFD 0x00001000 -#endif - static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *)) { size_t stack_size = 1024; @@ -47,12 +39,6 @@ static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *)) #endif } -static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, - unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - static int signal_received; static void set_signal_received_on_sigusr1(int sig) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c new file mode 100644 index 000000000000..fcbd10098067 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -0,0 +1,398 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sched.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "pidfd.h" +#include "../kselftest.h" + +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) + +static pid_t sys_clone3(struct clone_args *args) +{ + return syscall(__NR_clone3, args, sizeof(struct clone_args)); +} + +static pid_t sys_pidfd_wait(int pidfd, int *wstatus, siginfo_t *info, + struct rusage *ru, unsigned int states, + unsigned int flags) +{ + return syscall(__NR_pidfd_wait, pidfd, wstatus, info, ru, states, flags); +} + +static int test_pidfd_wait_syscall_support(void) +{ + int ret; + const char *test_name = "pidfd_wait check for support"; + + ret = sys_pidfd_wait(-EBADF, NULL, NULL, NULL, WEXITED, 0); + if (ret < 0 && errno == ENOSYS) + ksft_exit_skip("%s test: pidfd_wait() syscall not supported\n", + test_name); + + ksft_test_result_pass("%s test: Passed\n", test_name); + return 0; +} + +static int test_pidfd_wait_simple(void) +{ + const char *test_name = "pidfd_wait simple"; + int pidfd = -1, status = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + pid_t pid; + + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s", + test_name, strerror(errno)); + + if (pid == 0) + exit(EXIT_SUCCESS); + + pid = sys_pidfd_wait(pidfd, NULL, NULL, NULL, -1, 0); + if (pid > 0) + ksft_exit_fail_msg( + "%s test: succeeded to wait on process with invalid flags passed", + test_name); + + pid = sys_pidfd_wait(pidfd, NULL, NULL, NULL, 0, -1); + if (pid > 0) + ksft_exit_fail_msg( + "%s test: succeeded to wait on process with invalid flags passed", + test_name); + + pid = sys_pidfd_wait(pidfd, NULL, NULL, NULL, 0, 0); + if (pid > 0) + ksft_exit_fail_msg( + "%s test: succeeded to wait on process with invalid flags passed", + test_name); + + pid = sys_pidfd_wait(pidfd, NULL, NULL, NULL, WEXITED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + close(pidfd); + + pidfd = -1; + parent_tid = -1; + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s", + test_name, strerror(errno)); + + if (pid == 0) + exit(EXIT_SUCCESS); + + pid = sys_pidfd_wait(pidfd, &status, NULL, NULL, WEXITED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + if (!WIFEXITED(status) || WEXITSTATUS(status)) + ksft_exit_fail_msg( + "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + close(pidfd); + + pidfd = -1; + parent_tid = -1; + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s", + test_name, strerror(errno)); + + if (pid == 0) + exit(EXIT_FAILURE); + + pid = sys_pidfd_wait(pidfd, &status, NULL, NULL, WEXITED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + if (!WIFEXITED(status) || !WEXITSTATUS(status)) + ksft_exit_fail_msg( + "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + close(pidfd); + + ksft_test_result_pass("%s test: Passed\n", test_name); + return 0; +} + +static int test_pidfd_wait_rusage(void) +{ + const char *test_name = "pidfd_wait rusage"; + int pidfd = -1, status = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + pid_t pid; + struct rusage rusage = { 0 }; + + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s", + test_name, strerror(errno)); + + if (pid == 0) + exit(EXIT_SUCCESS); + + pid = sys_pidfd_wait(pidfd, &status, NULL, &rusage, WEXITED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + if (!WIFEXITED(status) || WEXITSTATUS(status)) + ksft_exit_fail_msg( + "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + close(pidfd); + + ksft_test_result_pass("%s test: Passed\n", test_name); + return 0; +} + +static int test_pidfd_wait_siginfo(void) +{ + const char *test_name = "pidfd_wait siginfo"; + int pidfd = -1, status = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s", + test_name, strerror(errno)); + + if (pid == 0) + exit(EXIT_SUCCESS); + + pid = sys_pidfd_wait(pidfd, &status, &info, NULL, WEXITED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + if (!WIFEXITED(status) || WEXITSTATUS(status)) + ksft_exit_fail_msg( + "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + close(pidfd); + + if (info.si_signo != SIGCHLD) + ksft_exit_fail_msg( + "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s", + test_name, info.si_signo, parent_tid, pidfd, + strerror(errno)); + + if (info.si_code != CLD_EXITED) + ksft_exit_fail_msg( + "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s", + test_name, info.si_code, parent_tid, pidfd, + strerror(errno)); + + ksft_test_result_pass("%s test: Passed\n", test_name); + return 0; +} + +static int test_pidfd_wait_states(void) +{ + const char *test_name = "pidfd_wait states"; + int pidfd = -1, status = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s", + test_name, strerror(errno)); + + if (pid == 0) { + kill(getpid(), SIGSTOP); + kill(getpid(), SIGSTOP); + exit(EXIT_SUCCESS); + } + + pid = sys_pidfd_wait(pidfd, &status, &info, NULL, WSTOPPED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + pid = sys_pidfd_wait(pidfd, &status, &info, NULL, WCONTINUED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + pid = sys_pidfd_wait(pidfd, &status, &info, NULL, WUNTRACED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + pid = sys_pidfd_wait(pidfd, &status, &info, NULL, WEXITED, 0); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + close(pidfd); + + ksft_test_result_pass("%s test: Passed\n", test_name); + return 0; +} + +static int test_clone_wait_pid(void) +{ + const char *test_name = "CLONE_WAIT_PID"; + int pidfd = -1, status = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PARENT_SETTID | CLONE_WAIT_PID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid, rpid; + + pid = sys_clone3(&args); + if (pid > 0) + ksft_exit_fail_msg( + "%s test: managed to create new process with CLONE_WAIT_PID but without CLONE_PIDFD", + test_name); + + args.flags |= CLONE_PIDFD; + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s", + test_name, strerror(errno)); + + if (pid == 0) + exit(EXIT_SUCCESS); + + ret = syscall(__NR_waitid, P_ALL, -1, NULL, WEXITED, NULL); + if (ret == 0) + ksft_exit_fail_msg( + "%s test: managed to wait on process created with CLONE_PIDFD | CLONE_WAIT_PID with pid %d and pidfd %d through waitid(P_ALL) request", + test_name, parent_tid, pidfd); + + ret = syscall(__NR_waitid, P_PGID, getpgid(0), NULL, WEXITED, NULL); + if (ret == 0) + ksft_exit_fail_msg( + "%s test: managed to wait on process created with CLONE_PIDFD | CLONE_WAIT_PID with pid %d and pidfd %d through waitid(P_PGID) request", + test_name, parent_tid, pidfd); + + rpid = syscall(__NR_wait4, -1, NULL, 0, NULL); + if (rpid > 0) + ksft_exit_fail_msg( + "%s test: managed to wait on process created with CLONE_PIDFD | CLONE_WAIT_PID with pid %d and pidfd %d through wait4(-1) request", + test_name, parent_tid, pidfd); + + rpid = syscall(__NR_wait4, 0, NULL, 0, NULL); + if (rpid > 0) + ksft_exit_fail_msg( + "%s test: managed to wait on process created with CLONE_PIDFD | CLONE_WAIT_PID with pid %d and pidfd %d through wait4(0) request", + test_name, parent_tid, pidfd); + + ret = syscall(__NR_waitid, P_PID, pid, NULL, WEXITED | WNOWAIT | WNOHANG, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process created with CLONE_PIDFD | CLONE_WAIT_PID with pid %d and pidfd %d through waitid(P_PID) request: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + rpid = sys_pidfd_wait(pidfd, NULL, NULL, NULL, WEXITED, WNOWAIT | WNOHANG); + if (rpid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + + rpid = syscall(__NR_wait4, pid, &status, 0, NULL); + if (rpid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process created with CLONE_PIDFD | CLONE_WAIT_PID with pid %d and pidfd %d through wait4(%d) request: %s", + test_name, parent_tid, pidfd, parent_tid, strerror(errno)); + + if (!WIFEXITED(status) || WEXITSTATUS(status)) + ksft_exit_fail_msg( + "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s", + test_name, parent_tid, pidfd, strerror(errno)); + close(pidfd); + + ksft_test_result_pass("%s test: Passed\n", test_name); + return 0; +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(6); + + test_pidfd_wait_syscall_support(); + test_pidfd_wait_simple(); + test_pidfd_wait_rusage(); + test_pidfd_wait_siginfo(); + test_pidfd_wait_states(); + test_clone_wait_pid(); + + return ksft_exit_pass(); +} -- 2.22.0

6 years, 3 months

1
0
0 0

[PATCH] selftests: kmod: Fix typo in kmod.sh

by Masanari Iida

This patch fixes some spelling typos in kmod.sh Signed-off-by: Masanari Iida <standby24x7(a)gmail.com> --- tools/testing/selftests/kmod/kmod.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 0a76314b4414..8b944cf042f6 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -28,7 +28,7 @@ # override by exporting to your environment prior running this script. # For instance this script assumes you do not have xfs loaded upon boot. # If this is false, export DEFAULT_KMOD_FS="ext4" prior to running this -# script if the filesyste module you don't have loaded upon bootup +# script if the filesystem module you don't have loaded upon bootup # is ext4 instead. Refer to allow_user_defaults() for a list of user # override variables possible. # @@ -263,7 +263,7 @@ config_get_test_result() config_reset() { if ! echo -n "1" >"$DIR"/reset; then - echo "$0: reset shuld have worked" >&2 + echo "$0: reset should have worked" >&2 exit 1 fi } @@ -488,7 +488,7 @@ usage() echo Example uses: echo echo "${TEST_NAME}.sh -- executes all tests" - echo "${TEST_NAME}.sh -t 0008 -- Executes test ID 0008 number of times is recomended" + echo "${TEST_NAME}.sh -t 0008 -- Executes test ID 0008 number of times is recommended" echo "${TEST_NAME}.sh -w 0008 -- Watch test ID 0008 run until an error occurs" echo "${TEST_NAME}.sh -s 0008 -- Run test ID 0008 once" echo "${TEST_NAME}.sh -c 0008 3 -- Run test ID 0008 three times" -- 2.22.0.545.g9c9b961d7eb1

6 years, 3 months

2
1
0 0

"make" vs "$(MAKE)" in tools/testing/selftests/Makefile

by Ilya Leoshkevich

Hello, Is there a reason why kselftest Makefile uses plain "make" instead of "$(MAKE)"? Because of this, "make kselftest TARGETS=bpf -j12" ends up building all bpf tests sequentially, since the top make's jobserver is not shared with its children. Replacing "make" with "$(MAKE)" helps, but since other Makefiles use "$(MAKE)", it looks as if this has been done intentionally. Best regards, Ilya

6 years, 3 months

3
2
0 0

[PATCH] selftests/livepatch: add test skip handling

by Joe Lawrence

Before running a livpeatch self-test, first verify that we've built and installed the livepatch self-test kernel modules by running a 'modprobe --dry-run'. This should catch a few environment issues, including !CONFIG_LIVEPATCH and !CONFIG_TEST_LIVEPATCH. In these cases, exit gracefully with test-skip status rather than test-fail status. Reported-by: Jiri Benc <jbenc(a)redhat.com> Suggested-by: Shuah Khan <shuah(a)kernel.org> Signed-off-by: Joe Lawrence <joe.lawrence(a)redhat.com> --- tools/testing/selftests/livepatch/functions.sh | 18 ++++++++++++++++++ .../selftests/livepatch/test-callbacks.sh | 5 +++++ .../selftests/livepatch/test-livepatch.sh | 3 +++ .../selftests/livepatch/test-shadow-vars.sh | 2 ++ 4 files changed, 28 insertions(+) diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index 30195449c63c..92d6cfb49365 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -13,6 +13,14 @@ function log() { echo "$1" > /dev/kmsg } +# skip(msg) - testing can't proceed +# msg - explanation +function skip() { + log "SKIP: $1" + echo "SKIP: $1" >&2 + exit 4 +} + # die(msg) - game over, man # msg - dying words function die() { @@ -43,6 +51,16 @@ function loop_until() { done } +function assert_mod() { + local mod="$1" + + if ! modprobe --dry-run "$mod" &>/dev/null ; then + skip "Failed modprobe --dry-run of module: $mod" + fi + + return 1 +} + function is_livepatch_mod() { local mod="$1" diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh index e97a9dcb73c7..87a407cee7fd 100755 --- a/tools/testing/selftests/livepatch/test-callbacks.sh +++ b/tools/testing/selftests/livepatch/test-callbacks.sh @@ -9,6 +9,11 @@ MOD_LIVEPATCH2=test_klp_callbacks_demo2 MOD_TARGET=test_klp_callbacks_mod MOD_TARGET_BUSY=test_klp_callbacks_busy +assert_mod $MOD_LIVEPATCH +assert_mod $MOD_LIVEPATCH2 +assert_mod $MOD_TARGET +assert_mod $MOD_TARGET_BUSY + set_dynamic_debug diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index f05268aea859..8d3b75ceeeff 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -7,6 +7,9 @@ MOD_LIVEPATCH=test_klp_livepatch MOD_REPLACE=test_klp_atomic_replace +assert_mod $MOD_LIVEPATCH +assert_mod $MOD_REPLACE + set_dynamic_debug diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh index 04a37831e204..1ab09bc50363 100755 --- a/tools/testing/selftests/livepatch/test-shadow-vars.sh +++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh @@ -6,6 +6,8 @@ MOD_TEST=test_klp_shadow_vars +assert_mod $MOD_TEST + set_dynamic_debug -- 2.21.0

6 years, 3 months

5
8
0 0

[PATCH v10 0/9] namei: openat2(2) path resolution restrictions

by Aleksa Sarai

This patch is being developed here (with snapshots of each series version being stashed in separate branches with names of the form "resolveat/vX-summary"): <https://github.com/cyphar/linux/tree/resolveat/master> Patch changelog: v10: * Ensure that unlazy_walk() will fail if we are in a scoped walk and the caller has zeroed nd->root (this happens in a few places, I'm not sure why because unlazy_walk() does legitimize_path() already). In this case we need to go through path_init() again to reset it (otherwise we will have a breakout because set_root() will breakout). * Also add a WARN_ON (and return -ENOTRECOVERABLE) if LOOKUP_IN_ROOT is set and we are in set_root() -- which should never happen and will cause a breakout. * Make changes suggested by Al Viro: * Remove nd->{opath_mask,acc_mode} by moving all of the magic-link permission logic be done after trailing_symlink() (with trailing_magiclink()) only within path_openat(). * Introduce LOOKUP_MAGICLINK_JUMPED to be able to detect magic-link jumps done with nd_jump_link() (so we don't end up blocking other LOOKUP_JUMPED cases). * Simplify all of the path_init() changes to make the code far less confusing. dirfd_path_init() turns out to be un-necessary. * Make openat2(2) also -EINVAL on unknown how->flags. [Dmitry V. Levin] * Clean up bad definitions of O_EMPTYPATH on architectures where O_* flags are subtly different to <asm-generic/fcntl.h>. * Switch away from passing a struct to build_open_flags() and instead just copy the one field we need to temporarily modify (how->flags). Also fix a bug in OPENHOW_MODE. [Rasmus Villemoes] * Fix syscall linkages and switch to 437. [Arnd Bergmann] * Clean up text in commit messages and the cover-letter. [Rolf Eike Beer] * Fix openat2 selftest makefile. [Michael Ellerman] The need for some sort of control over VFS's path resolution (to avoid malicious paths resulting in inadvertent breakouts) has been a very long-standing desire of many userspace applications. This patchset is a revival of Al Viro's old AT_NO_JUMPS[1,2] patchset (which was a variant of David Drysdale's O_BENEATH patchset[3] which was a spin-off of the Capsicum project[4]) with a few additions and changes made based on the previous discussion within [5] as well as others I felt were useful. In line with the conclusions of the original discussion of AT_NO_JUMPS, the flag has been split up into separate flags. However, instead of being an openat(2) flag it is provided through a new syscall openat2(2) which provides an alternative way to get an O_PATH file descriptor (the reasoning for doing this is included in the patch description). The following new LOOKUP_* flags are added: * LOOKUP_NO_XDEV blocks all mountpoint crossings (upwards, downwards, or through absolute links). Absolute pathnames alone in openat(2) do not trigger this. * LOOKUP_NO_MAGICLINKS blocks resolution through /proc/$pid/fd-style links. This is done by blocking the usage of nd_jump_link() during resolution in a filesystem. The term "magic-links" is used to match with the only reference to these links in Documentation/, but I'm happy to change the name. It should be noted that this is different to the scope of ~LOOKUP_FOLLOW in that it applies to all path components. However, you can do openat2(NO_FOLLOW|NO_MAGICLINKS) on a magic-link and it will *not* fail (assuming that no parent component was a magic-link), and you will have an fd for the magic-link. * LOOKUP_BENEATH disallows escapes to outside the starting dirfd's tree, using techniques such as ".." or absolute links. Absolute paths in openat(2) are also disallowed. Conceptually this flag is to ensure you "stay below" a certain point in the filesystem tree -- but this requires some additional to protect against various races that would allow escape using "..". Currently LOOKUP_BENEATH implies LOOKUP_NO_MAGICLINKS, because it can trivially beam you around the filesystem (breaking the protection). In future, there might be similar safety checks done as in LOOKUP_IN_ROOT, but that requires more discussion. In addition, two new flags are added that expand on the above ideas: * LOOKUP_NO_SYMLINKS does what it says on the tin. No symlink resolution is allowed at all, including magic-links. Just as with LOOKUP_NO_MAGICLINKS this can still be used with NOFOLLOW to open an fd for the symlink as long as no parent path had a symlink component. * LOOKUP_IN_ROOT is an extension of LOOKUP_BENEATH that, rather than blocking attempts to move past the root, forces all such movements to be scoped to the starting point. This provides chroot(2)-like protection but without the cost of a chroot(2) for each filesystem operation, as well as being safe against race attacks that chroot(2) is not. If a race is detected (as with LOOKUP_BENEATH) then an error is generated, and similar to LOOKUP_BENEATH it is not permitted to cross magic-links with LOOKUP_IN_ROOT. The primary need for this is from container runtimes, which currently need to do symlink scoping in userspace[6] when opening paths in a potentially malicious container. There is a long list of CVEs that could have bene mitigated by having RESOLVE_THIS_ROOT (such as CVE-2017-1002101, CVE-2017-1002102, CVE-2018-15664, and CVE-2019-5736, just to name a few). And further, several semantics of file descriptor "re-opening" are now changed to prevent attacks like CVE-2019-5736 by restricting how magic-links can be resolved (based on their mode). This required some other changes to the semantics of the modes of O_PATH file descriptor's associated /proc/self/fd magic-links. openat2(2) has the ability to further restrict re-opening of its own O_PATH fds, so that users can make even better use of this feature. Finally, O_EMPTYPATH was added so that users can do /proc/self/fd-style re-opening without depending on procfs. The new restricted semantics for magic-links are applied here too. In order to make all of the above more usable, I'm working on libpathrs[7] which is a C-friendly library for safe path resolution. It features a userspace-emulated backend if the kernel doesn't support openat2(2). Hopefully we can get userspace to switch to using it, and thus get openat2(2) support for free once it's ready. Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: Eric Biederman <ebiederm(a)xmission.com> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: David Howells <dhowells(a)redhat.com> Cc: Jann Horn <jannh(a)google.com> Cc: Christian Brauner <christian(a)brauner.io> Cc: David Drysdale <drysdale(a)google.com> Cc: Tycho Andersen <tycho(a)tycho.ws> Cc: Kees Cook <keescook(a)chromium.org> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: <containers(a)lists.linux-foundation.org> Cc: <linux-fsdevel(a)vger.kernel.org> Cc: <linux-api(a)vger.kernel.org> [1]: https://lwn.net/Articles/721443/ [2]: https://lore.kernel.org/patchwork/patch/784221/ [3]: https://lwn.net/Articles/619151/ [4]: https://lwn.net/Articles/603929/ [5]: https://lwn.net/Articles/723057/ [6]: https://github.com/cyphar/filepath-securejoin [7]: https://github.com/openSUSE/libpathrs Aleksa Sarai (9): namei: obey trailing magic-link DAC permissions procfs: switch magic-link modes to be more sane open: O_EMPTYPATH: procfs-less file descriptor re-opening namei: O_BENEATH-style path resolution flags namei: LOOKUP_IN_ROOT: chroot-like path resolution namei: aggressively check for nd->root escape on ".." resolution open: openat2(2) syscall kselftest: save-and-restore errno to allow for %m formatting selftests: add openat2(2) selftests Documentation/filesystems/path-lookup.rst | 12 +- arch/alpha/include/uapi/asm/fcntl.h | 1 + arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/include/uapi/asm/fcntl.h | 39 +- arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/include/uapi/asm/fcntl.h | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + fs/fcntl.c | 2 +- fs/internal.h | 1 + fs/namei.c | 270 ++++++++++-- fs/open.c | 112 ++++- fs/proc/base.c | 20 +- fs/proc/fd.c | 23 +- fs/proc/namespaces.c | 2 +- include/linux/fcntl.h | 17 +- include/linux/fs.h | 8 +- include/linux/namei.h | 9 + include/linux/syscalls.h | 17 +- include/uapi/asm-generic/fcntl.h | 4 + include/uapi/asm-generic/unistd.h | 4 +- include/uapi/linux/fcntl.h | 42 ++ tools/testing/selftests/Makefile | 1 + tools/testing/selftests/kselftest.h | 15 + tools/testing/selftests/memfd/memfd_test.c | 7 +- tools/testing/selftests/openat2/.gitignore | 1 + tools/testing/selftests/openat2/Makefile | 8 + tools/testing/selftests/openat2/helpers.c | 162 +++++++ tools/testing/selftests/openat2/helpers.h | 114 +++++ .../testing/selftests/openat2/linkmode_test.c | 326 ++++++++++++++ .../selftests/openat2/rename_attack_test.c | 124 ++++++ .../testing/selftests/openat2/resolve_test.c | 397 ++++++++++++++++++ 46 files changed, 1652 insertions(+), 107 deletions(-) create mode 100644 tools/testing/selftests/openat2/.gitignore create mode 100644 tools/testing/selftests/openat2/Makefile create mode 100644 tools/testing/selftests/openat2/helpers.c create mode 100644 tools/testing/selftests/openat2/helpers.h create mode 100644 tools/testing/selftests/openat2/linkmode_test.c create mode 100644 tools/testing/selftests/openat2/rename_attack_test.c create mode 100644 tools/testing/selftests/openat2/resolve_test.c -- 2.22.0

6 years, 3 months

2
12
0 0

[PATCH v9 00/10] namei: openat2(2) path resolution restrictions

by Aleksa Sarai

Patch changelog: v9: * Replace resolveat(2) with openat2(2). [Linus] * Output a warning to dmesg if may_open_magiclink() is violated. * Add an openat2(O_CREAT) testcase. v8: * Default to O_CLOEXEC to match other new fd-creation syscalls (users can always disable O_CLOEXEC afterwards). [Christian] * Implement magic-link restrictions based on their mode. This is done through a series of masks and is designed to avoid breaking users -- most users don't have chained O_PATH fd re-opens. * Add O_EMPTYPATH which allows for fd re-opening without needing procfs. This would help some users of fd re-opening, and with the changes to magic-link permissions we now have the right semantics for such a flag. * Add selftests for resolveat(2), O_EMPTYPATH, and the magic-link mode semantics. v7: * Remove execveat(2) support for these flags since it might result in some pretty hairy security issues with setuid binaries. There are other avenues we can go down to solve the issues with CVE-2019-5736. [Jann] * Reserve an additional bit in resolveat(2) for the eXecute access mode if we end up implementing it. v6: * Drop O_* flags API to the new LOOKUP_ path scoping bits and instead introduce resolveat(2) as an alternative method of obtaining an O_PATH. The justification for this is included in patch 6 (though switching back to O_* flags is trivial). v5: * In response to CVE-2019-5736 (one of the vectors showed that open(2)+fexec(3) cannot be used to scope binfmt_script's implicit open_exec()), AT_* flags have been re-added and are now piped through to binfmt_script (and other binfmt_* that use open_exec) but are only supported for execveat(2) for now. v4: * Remove AT_* flag reservations, as they require more discussion. * Switch to path_is_under() over __d_path() for breakout checking. * Make O_XDEV no longer block openat("/tmp", "/", O_XDEV) -- dirfd is now ignored for absolute paths to match other flags. * Improve the dirfd_path_init() refactor and move it to a separate commit. * Remove reference to Linux-capsicum. * Switch "proclink" name to magic-link. v3: [resend] v2: * Made ".." resolution with AT_THIS_ROOT and AT_BENEATH safe(r) with some semi-aggressive __d_path checking (see patch 3). * Disallowed "proclinks" with AT_THIS_ROOT and AT_BENEATH, in the hopes they can be re-enabled once safe. * Removed the selftests as they will be reimplemented as xfstests. * Removed stat(2) support, since you can already get it through O_PATH and fstatat(2). The need for some sort of control over VFS's path resolution (to avoid malicious paths resulting in inadvertent breakouts) has been a very long-standing desire of many userspace applications. This patchset is a revival of Al Viro's old AT_NO_JUMPS[1,2] patchset (which was a variant of David Drysdale's O_BENEATH patchset[3] which was a spin-off of the Capsicum project[4]) with a few additions and changes made based on the previous discussion within [5] as well as others I felt were useful. In line with the conclusions of the original discussion of AT_NO_JUMPS, the flag has been split up into separate flags. However, instead of being an openat(2) flag it is provided through a new syscall openat2(2) which provides an alternative way to get an O_PATH file descriptor (the reasoning for doing this is included in patch 6). The following new LOOKUP_ flags are added: * LOOKUP_XDEV blocks all mountpoint crossings (upwards, downwards, or through absolute links). Absolute pathnames alone in openat(2) do not trigger this. * LOOKUP_NO_MAGICLINKS blocks resolution through /proc/$pid/fd-style links. This is done by blocking the usage of nd_jump_link() during resolution in a filesystem. The term "magic-links" is used to match with the only reference to these links in Documentation/, but I'm happy to change the name. It should be noted that this is different to the scope of ~LOOKUP_FOLLOW in that it applies to all path components. However, you can do openat2(NO_FOLLOW|NO_MAGICLINKS) on a magic-link and it will *not* fail (assuming that no parent component was a magic-link), and you will have an fd for the magic-link. * LOOKUP_BENEATH disallows escapes to outside the starting dirfd's tree, using techniques such as ".." or absolute links. Absolute paths in openat(2) are also disallowed. Conceptually this flag is to ensure you "stay below" a certain point in the filesystem tree -- but this requires some additional to protect against various races that would allow escape using "..". Currently LOOKUP_BENEATH implies LOOKUP_NO_MAGICLINKS, because it can trivially beam you around the filesystem (breaking the protection). In future, there might be similar safety checks done as in LOOKUP_IN_ROOT, but that requires more discussion. In addition, two new flags are added that expand on the above ideas: * LOOKUP_NO_SYMLINKS does what it says on the tin. No symlink resolution is allowed at all, including magic-links. Just as with LOOKUP_NO_MAGICLINKS this can still be used with NOFOLLOW to open an fd for the symlink as long as no parent path had a symlink component. * LOOKUP_IN_ROOT is an extension of LOOKUP_BENEATH that, rather than blocking attempts to move past the root, forces all such movements to be scoped to the starting point. This provides chroot(2)-like protection but without the cost of a chroot(2) for each filesystem operation, as well as being safe against race attacks that chroot(2) is not. If a race is detected (as with LOOKUP_BENEATH) then an error is generated, and similar to LOOKUP_BENEATH it is not permitted to cross magic-links with LOOKUP_IN_ROOT. The primary need for this is from container runtimes, which currently need to do symlink scoping in userspace[6] when opening paths in a potentially malicious container. There is a long list of CVEs that could have bene mitigated by having O_THISROOT (such as CVE-2017-1002101, CVE-2017-1002102, CVE-2018-15664, and CVE-2019-5736, just to name a few). And further, several semantics of file descriptor "re-opening" are now changed to prevent attacks like CVE-2019-5736 by restricting how magic-links can be resolved (based on their mode). This required some other changes to the semantics of the modes of O_PATH file descriptor's associated /proc/self/fd magic-links. openat2(2) has the ability to further restrict re-opening of its own O_PATH fds, so that users can make even better use of this feature. Finally, O_EMPTYPATH was added so that users can do /proc/self/fd-style re-opening without depending on procfs. The new restricted semantics for magic-links are applied here too. In order to make all of the above more usable, I'm working on libpathrs[7] which is a C-friendly library for safe path resolution. It features a userspace-emulated backend if the kernel doesn't support openat2(2). Hopefully we can get userspace to switch to using it, and thus get openat2(2) support for free once it's ready. Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: Eric Biederman <ebiederm(a)xmission.com> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: David Howells <dhowells(a)redhat.com> Cc: Jann Horn <jannh(a)google.com> Cc: Christian Brauner <christian(a)brauner.io> Cc: David Drysdale <drysdale(a)google.com> Cc: Tycho Andersen <tycho(a)tycho.ws> Cc: Kees Cook <keescook(a)chromium.org> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: <containers(a)lists.linux-foundation.org> Cc: <linux-fsdevel(a)vger.kernel.org> Cc: <linux-api(a)vger.kernel.org> [1]: https://lwn.net/Articles/721443/ [2]: https://lore.kernel.org/patchwork/patch/784221/ [3]: https://lwn.net/Articles/619151/ [4]: https://lwn.net/Articles/603929/ [5]: https://lwn.net/Articles/723057/ [6]: https://github.com/cyphar/filepath-securejoin [7]: https://github.com/openSUSE/libpathrs Aleksa Sarai (10): namei: obey trailing magic-link DAC permissions procfs: switch magic-link modes to be more sane open: O_EMPTYPATH: procfs-less file descriptor re-opening namei: split out nd->dfd handling to dirfd_path_init namei: O_BENEATH-style path resolution flags namei: LOOKUP_IN_ROOT: chroot-like path resolution namei: aggressively check for nd->root escape on ".." resolution open: openat2(2) syscall kselftest: save-and-restore errno to allow for %m formatting selftests: add openat2(2) selftests arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + fs/fcntl.c | 2 +- fs/internal.h | 1 + fs/namei.c | 333 ++++++++++++--- fs/open.c | 140 +++++-- fs/proc/base.c | 20 +- fs/proc/fd.c | 23 +- fs/proc/namespaces.c | 2 +- include/linux/fcntl.h | 17 +- include/linux/fs.h | 8 +- include/linux/namei.h | 8 + include/linux/syscalls.h | 14 +- include/uapi/asm-generic/fcntl.h | 5 + include/uapi/asm-generic/unistd.h | 5 +- include/uapi/linux/fcntl.h | 38 ++ tools/testing/selftests/Makefile | 1 + tools/testing/selftests/kselftest.h | 15 + tools/testing/selftests/memfd/memfd_test.c | 7 +- tools/testing/selftests/openat2/.gitignore | 1 + tools/testing/selftests/openat2/Makefile | 12 + tools/testing/selftests/openat2/helpers.c | 162 +++++++ tools/testing/selftests/openat2/helpers.h | 114 +++++ .../testing/selftests/openat2/linkmode_test.c | 325 ++++++++++++++ .../selftests/openat2/rename_attack_test.c | 124 ++++++ .../testing/selftests/openat2/resolve_test.c | 395 ++++++++++++++++++ 42 files changed, 1667 insertions(+), 125 deletions(-) create mode 100644 tools/testing/selftests/openat2/.gitignore create mode 100644 tools/testing/selftests/openat2/Makefile create mode 100644 tools/testing/selftests/openat2/helpers.c create mode 100644 tools/testing/selftests/openat2/helpers.h create mode 100644 tools/testing/selftests/openat2/linkmode_test.c create mode 100644 tools/testing/selftests/openat2/rename_attack_test.c create mode 100644 tools/testing/selftests/openat2/resolve_test.c -- 2.22.0

6 years, 3 months

7
44
0 0

[PATCH AUTOSEL 5.2 226/249] selftests: bpf: fix inlines in test_lwt_seg6local

by Sasha Levin

From: Jiri Benc <jbenc(a)redhat.com> [ Upstream commit 11aca65ec4db09527d3e9b6b41a0615b7da4386b ] Selftests are reporting this failure in test_lwt_seg6local.sh: + ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2 Error fetching program/map! Failed to parse eBPF program: Operation not permitted The problem is __attribute__((always_inline)) alone is not enough to prevent clang from inserting those functions in .text. In that case, .text is not marked as relocateable. See the output of objdump -h test_lwt_seg6local.o: Idx Name Size VMA LMA File off Algn 0 .text 00003530 0000000000000000 0000000000000000 00000040 2**3 CONTENTS, ALLOC, LOAD, READONLY, CODE This causes the iproute bpf loader to fail in bpf_fetch_prog_sec: bpf_has_call_data returns true but bpf_fetch_prog_relo fails as there's no relocateable .text section in the file. To fix this, convert to 'static __always_inline'. v2: Use 'static __always_inline' instead of 'static inline __attribute__((always_inline))' Fixes: c99a84eac026 ("selftests/bpf: test for seg6local End.BPF action") Signed-off-by: Jiri Benc <jbenc(a)redhat.com> Acked-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- .../testing/selftests/bpf/progs/test_lwt_seg6local.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c index 0575751bc1bc..e2f6ed0a583d 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c @@ -61,7 +61,7 @@ struct sr6_tlv_t { unsigned char value[0]; } BPF_PACKET_HEADER; -__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb) +static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb) { void *cursor, *data_end; struct ip6_srh_t *srh; @@ -95,7 +95,7 @@ __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb) return srh; } -__attribute__((always_inline)) +static __always_inline int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, uint32_t old_pad, uint32_t pad_off) { @@ -125,7 +125,7 @@ int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, return 0; } -__attribute__((always_inline)) +static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t *tlv_off, uint32_t *pad_size, uint32_t *pad_off) @@ -184,7 +184,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, return 0; } -__attribute__((always_inline)) +static __always_inline int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, struct sr6_tlv_t *itlv, uint8_t tlv_size) { @@ -228,7 +228,7 @@ int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, return update_tlv_pad(skb, new_pad, pad_size, pad_off); } -__attribute__((always_inline)) +static __always_inline int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off) { @@ -266,7 +266,7 @@ int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, return update_tlv_pad(skb, new_pad, pad_size, pad_off); } -__attribute__((always_inline)) +static __always_inline int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) { int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) + -- 2.20.1

6 years, 3 months

3
6
0 0

[PATCH AUTOSEL 5.2 124/171] rseq/selftests: Fix Thumb mode build failure on arm32

by Sasha Levin

From: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> [ Upstream commit ee8a84c60bcc1f1615bd9cb3edfe501e26cdc85b ] Using ".arm .inst" for the arm signature introduces build issues for programs compiled in Thumb mode because the assembler stays in the arm mode for the rest of the inline assembly. Revert to using a ".word" to express the signature as data instead. The choice of signature is a valid trap instruction on arm32 little endian, where both code and data are little endian. ARMv6+ big endian (BE8) generates mixed endianness code vs data: little-endian code and big-endian data. The data value of the signature needs to have its byte order reversed to generate the trap instruction. Prior to ARMv6, -mbig-endian generates big-endian code and data (which match), so the endianness of the data representation of the signature should not be reversed. However, the choice between BE32 and BE8 is done by the linker, so we cannot know whether code and data endianness will be mixed before the linker is invoked. So rather than try to play tricks with the linker, the rseq signature is simply data (not a trap instruction) prior to ARMv6 on big endian. This is why the signature is expressed as data (.word) rather than as instruction (.inst) in assembler. Because a ".word" is used to emit the signature, it will be interpreted as a literal pool by a disassembler, not as an actual instruction. Considering that the signature is not meant to be executed except in scenarios where the program execution is completely bogus, this should not be an issue. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Acked-by: Will Deacon <will.deacon(a)arm.com> CC: Peter Zijlstra <peterz(a)infradead.org> CC: Thomas Gleixner <tglx(a)linutronix.de> CC: Joel Fernandes <joelaf(a)google.com> CC: Catalin Marinas <catalin.marinas(a)arm.com> CC: Dave Watson <davejwatson(a)fb.com> CC: Will Deacon <will.deacon(a)arm.com> CC: Shuah Khan <shuah(a)kernel.org> CC: Andi Kleen <andi(a)firstfloor.org> CC: linux-kselftest(a)vger.kernel.org CC: "H . Peter Anvin" <hpa(a)zytor.com> CC: Chris Lameter <cl(a)linux.com> CC: Russell King <linux(a)arm.linux.org.uk> CC: Michael Kerrisk <mtk.manpages(a)gmail.com> CC: "Paul E . McKenney" <paulmck(a)linux.vnet.ibm.com> CC: Paul Turner <pjt(a)google.com> CC: Boqun Feng <boqun.feng(a)gmail.com> CC: Josh Triplett <josh(a)joshtriplett.org> CC: Steven Rostedt <rostedt(a)goodmis.org> CC: Ben Maurer <bmaurer(a)fb.com> CC: linux-api(a)vger.kernel.org CC: Andy Lutomirski <luto(a)amacapital.net> CC: Andrew Morton <akpm(a)linux-foundation.org> CC: Linus Torvalds <torvalds(a)linux-foundation.org> CC: Carlos O'Donell <carlos(a)redhat.com> CC: Florian Weimer <fweimer(a)redhat.com> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/rseq/rseq-arm.h | 61 +++++++++++++------------ 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 84f28f147fb6..5943c816c07c 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -6,6 +6,8 @@ */ /* + * - ARM little endian + * * RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand * value 0x5de3. This traps if user-space reaches this instruction by mistake, * and the uncommon operand ensures the kernel does not move the instruction @@ -22,36 +24,40 @@ * def3 udf #243 ; 0xf3 * e7f5 b.n <7f5> * - * pre-ARMv6 big endian code: - * e7f5 b.n <7f5> - * def3 udf #243 ; 0xf3 + * - ARMv6+ big endian (BE8): * * ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian - * code and big-endian data. Ensure the RSEQ_SIG data signature matches code - * endianness. Prior to ARMv6, -mbig-endian generates big-endian code and data - * (which match), so there is no need to reverse the endianness of the data - * representation of the signature. However, the choice between BE32 and BE8 - * is done by the linker, so we cannot know whether code and data endianness - * will be mixed before the linker is invoked. + * code and big-endian data. The data value of the signature needs to have its + * byte order reversed to generate the trap instruction: + * + * Data: 0xf3def5e7 + * + * Translates to this A32 instruction pattern: + * + * e7f5def3 udf #24035 ; 0x5de3 + * + * Translates to this T16 instruction pattern: + * + * def3 udf #243 ; 0xf3 + * e7f5 b.n <7f5> + * + * - Prior to ARMv6 big endian (BE32): + * + * Prior to ARMv6, -mbig-endian generates big-endian code and data + * (which match), so the endianness of the data representation of the + * signature should not be reversed. However, the choice between BE32 + * and BE8 is done by the linker, so we cannot know whether code and + * data endianness will be mixed before the linker is invoked. So rather + * than try to play tricks with the linker, the rseq signature is simply + * data (not a trap instruction) prior to ARMv6 on big endian. This is + * why the signature is expressed as data (.word) rather than as + * instruction (.inst) in assembler. */ -#define RSEQ_SIG_CODE 0xe7f5def3 - -#ifndef __ASSEMBLER__ - -#define RSEQ_SIG_DATA \ - ({ \ - int sig; \ - asm volatile ("b 2f\n\t" \ - "1: .inst " __rseq_str(RSEQ_SIG_CODE) "\n\t" \ - "2:\n\t" \ - "ldr %[sig], 1b\n\t" \ - : [sig] "=r" (sig)); \ - sig; \ - }) - -#define RSEQ_SIG RSEQ_SIG_DATA - +#ifdef __ARMEB__ +#define RSEQ_SIG 0xf3def5e7 /* udf #24035 ; 0x5de3 (ARMv6+) */ +#else +#define RSEQ_SIG 0xe7f5def3 /* udf #24035 ; 0x5de3 */ #endif #define rseq_smp_mb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") @@ -125,8 +131,7 @@ do { \ __rseq_str(table_label) ":\n\t" \ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \ - ".arm\n\t" \ - ".inst " __rseq_str(RSEQ_SIG_CODE) "\n\t" \ + ".word " __rseq_str(RSEQ_SIG) "\n\t" \ __rseq_str(label) ":\n\t" \ teardown \ "b %l[" __rseq_str(abort_label) "]\n\t" -- 2.20.1

6 years, 3 months

1
0
0 0

[PATCH v10 6/9] kselftests: cgroup: add freezer controller self-tests

by Roman Gushchin

This patch implements 9 tests for the freezer controller for cgroup v2: 1) a simple test, which aims to freeze and unfreeze a cgroup with 100 processes 2) a more complicated tree test, which creates a hierarchy of cgroups, puts some processes in some cgroups, and tries to freeze and unfreeze different parts of the subtree 3) a forkbomb test: the test aims to freeze a forkbomb running in a cgroup, kill all tasks in the cgroup and remove the cgroup without the unfreezing. 4) rmdir test: the test creates two nested cgroups, freezes the parent one, checks that the child can be successfully removed, and a new child can be created 5) migration tests: the test checks migration of a task between frozen cgroups: from a frozen to a running, from a running to a frozen, and from a frozen to a frozen. 6) ptrace test: the test checks that it's possible to attach to a process in a frozen cgroup, get some information and detach, and the cgroup will remain frozen. 7) stopped test: the test checks that it's possible to freeze a cgroup with a stopped task 8) ptraced test: the test checks that it's possible to freeze a cgroup with a ptraced task 9) vfork test: the test checks that it's possible to freeze a cgroup with a parent process waiting for the child process in vfork() Expected output: $ ./test_freezer ok 1 test_cgfreezer_simple ok 2 test_cgfreezer_tree ok 3 test_cgfreezer_forkbomb ok 4 test_cgrreezer_rmdir ok 5 test_cgfreezer_migrate ok 6 test_cgfreezer_ptrace ok 7 test_cgfreezer_stopped ok 8 test_cgfreezer_ptraced ok 9 test_cgfreezer_vfork Signed-off-by: Roman Gushchin <guro(a)fb.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: kernel-team(a)fb.com Cc: linux-kselftest(a)vger.kernel.org --- tools/testing/selftests/cgroup/.gitignore | 1 + tools/testing/selftests/cgroup/Makefile | 2 + tools/testing/selftests/cgroup/cgroup_util.c | 54 +- tools/testing/selftests/cgroup/cgroup_util.h | 5 + tools/testing/selftests/cgroup/test_freezer.c | 851 ++++++++++++++++++ 5 files changed, 912 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/cgroup/test_freezer.c diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index adacda50a4b2..7f9835624793 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -1,2 +1,3 @@ test_memcontrol test_core +test_freezer diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 23fbaa4a9630..8d369b6a2069 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -5,8 +5,10 @@ all: TEST_GEN_PROGS = test_memcontrol TEST_GEN_PROGS += test_core +TEST_GEN_PROGS += test_freezer include ../lib.mk $(OUTPUT)/test_memcontrol: cgroup_util.c $(OUTPUT)/test_core: cgroup_util.c +$(OUTPUT)/test_freezer: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index eba06f94433b..4c223266299a 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -74,6 +74,16 @@ char *cg_name_indexed(const char *root, const char *name, int index) return ret; } +char *cg_control(const char *cgroup, const char *control) +{ + size_t len = strlen(cgroup) + strlen(control) + 2; + char *ret = malloc(len); + + snprintf(ret, len, "%s/%s", cgroup, control); + + return ret; +} + int cg_read(const char *cgroup, const char *control, char *buf, size_t len) { char path[PATH_MAX]; @@ -196,7 +206,32 @@ int cg_create(const char *cgroup) return mkdir(cgroup, 0644); } -static int cg_killall(const char *cgroup) +int cg_wait_for_proc_count(const char *cgroup, int count) +{ + char buf[10 * PAGE_SIZE] = {0}; + int attempts; + char *ptr; + + for (attempts = 10; attempts >= 0; attempts--) { + int nr = 0; + + if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) + break; + + for (ptr = buf; *ptr; ptr++) + if (*ptr == '\n') + nr++; + + if (nr >= count) + return 0; + + usleep(100000); + } + + return -1; +} + +int cg_killall(const char *cgroup) { char buf[PAGE_SIZE]; char *ptr = buf; @@ -238,6 +273,14 @@ int cg_destroy(const char *cgroup) return ret; } +int cg_enter(const char *cgroup, int pid) +{ + char pidbuf[64]; + + snprintf(pidbuf, sizeof(pidbuf), "%d", pid); + return cg_write(cgroup, "cgroup.procs", pidbuf); +} + int cg_enter_current(const char *cgroup) { char pidbuf[64]; @@ -367,3 +410,12 @@ int set_oom_adj_score(int pid, int score) close(fd); return 0; } + +char proc_read_text(int pid, const char *item, char *buf, size_t size) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); + + return read_text(path, buf, size); +} diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 9ac8b7958f83..c72f28046bfa 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -18,6 +18,7 @@ static inline int values_close(long a, long b, int err) extern int cg_find_unified_root(char *root, size_t len); extern char *cg_name(const char *root, const char *name); extern char *cg_name_indexed(const char *root, const char *name, int index); +extern char *cg_control(const char *cgroup, const char *control); extern int cg_create(const char *cgroup); extern int cg_destroy(const char *cgroup); extern int cg_read(const char *cgroup, const char *control, @@ -32,6 +33,7 @@ extern int cg_write(const char *cgroup, const char *control, char *buf); extern int cg_run(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg); +extern int cg_enter(const char *cgroup, int pid); extern int cg_enter_current(const char *cgroup); extern int cg_run_nowait(const char *cgroup, int (*fn)(const char *cgroup, void *arg), @@ -41,3 +43,6 @@ extern int alloc_pagecache(int fd, size_t size); extern int alloc_anon(const char *cgroup, void *arg); extern int is_swap_enabled(void); extern int set_oom_adj_score(int pid, int score); +extern int cg_wait_for_proc_count(const char *cgroup, int count); +extern int cg_killall(const char *cgroup); +extern char proc_read_text(int pid, const char *item, char *buf, size_t size); diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c new file mode 100644 index 000000000000..2bfddb6d6d3b --- /dev/null +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -0,0 +1,851 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <stdbool.h> +#include <linux/limits.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <poll.h> +#include <stdlib.h> +#include <sys/inotify.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +#define DEBUG +#ifdef DEBUG +#define debug(args...) fprintf(stderr, args) +#else +#define debug(args...) +#endif + +/* + * Check if the cgroup is frozen by looking at the cgroup.events::frozen value. + */ +static int cg_check_frozen(const char *cgroup, bool frozen) +{ + if (frozen) { + if (cg_read_strstr(cgroup, "cgroup.events", "frozen 1") != 0) { + debug("Cgroup %s isn't frozen\n", cgroup); + return -1; + } + } else { + /* + * Check the cgroup.events::frozen value. + */ + if (cg_read_strstr(cgroup, "cgroup.events", "frozen 0") != 0) { + debug("Cgroup %s is frozen\n", cgroup); + return -1; + } + } + + return 0; +} + +/* + * Freeze the given cgroup. + */ +static int cg_freeze_nowait(const char *cgroup, bool freeze) +{ + return cg_write(cgroup, "cgroup.freeze", freeze ? "1" : "0"); +} + +/* + * Prepare for waiting on cgroup.events file. + */ +static int cg_prepare_for_wait(const char *cgroup) +{ + int fd, ret = -1; + + fd = inotify_init1(0); + if (fd == -1) { + debug("Error: inotify_init1() failed\n"); + return fd; + } + + ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"), + IN_MODIFY); + if (ret == -1) { + debug("Error: inotify_add_watch() failed\n"); + close(fd); + } + + return fd; +} + +/* + * Wait for an event. If there are no events for 10 seconds, + * treat this an error. + */ +static int cg_wait_for(int fd) +{ + int ret = -1; + struct pollfd fds = { + .fd = fd, + .events = POLLIN, + }; + + while (true) { + ret = poll(&fds, 1, 10000); + + if (ret == -1) { + if (errno == EINTR) + continue; + debug("Error: poll() failed\n"); + break; + } + + if (ret > 0 && fds.revents & POLLIN) { + ret = 0; + break; + } + } + + return ret; +} + +/* + * Attach a task to the given cgroup and wait for a cgroup frozen event. + * All transient events (e.g. populated) are ignored. + */ +static int cg_enter_and_wait_for_frozen(const char *cgroup, int pid, + bool frozen) +{ + int fd, ret = -1; + int attempts; + + fd = cg_prepare_for_wait(cgroup); + if (fd < 0) + return fd; + + ret = cg_enter(cgroup, pid); + if (ret) + goto out; + + for (attempts = 0; attempts < 10; attempts++) { + ret = cg_wait_for(fd); + if (ret) + break; + + ret = cg_check_frozen(cgroup, frozen); + if (ret) + continue; + } + +out: + close(fd); + return ret; +} + +/* + * Freeze the given cgroup and wait for the inotify signal. + * If there are no events in 10 seconds, treat this as an error. + * Then check that the cgroup is in the desired state. + */ +static int cg_freeze_wait(const char *cgroup, bool freeze) +{ + int fd, ret = -1; + + fd = cg_prepare_for_wait(cgroup); + if (fd < 0) + return fd; + + ret = cg_freeze_nowait(cgroup, freeze); + if (ret) { + debug("Error: cg_freeze_nowait() failed\n"); + goto out; + } + + ret = cg_wait_for(fd); + if (ret) + goto out; + + ret = cg_check_frozen(cgroup, freeze); +out: + close(fd); + return ret; +} + +/* + * A simple process running in a sleep loop until being + * re-parented. + */ +static int child_fn(const char *cgroup, void *arg) +{ + int ppid = getppid(); + + while (getppid() == ppid) + usleep(1000); + + return getppid() == ppid; +} + +/* + * A simple test for the cgroup freezer: populated the cgroup with 100 + * running processes and freeze it. Then unfreeze it. Then it kills all + * processes and destroys the cgroup. + */ +static int test_cgfreezer_simple(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + int i; + + cgroup = cg_name(root, "cg_test_simple"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + for (i = 0; i < 100; i++) + cg_run_nowait(cgroup, child_fn, NULL); + + if (cg_wait_for_proc_count(cgroup, 100)) + goto cleanup; + + if (cg_check_frozen(cgroup, false)) + goto cleanup; + + if (cg_freeze_wait(cgroup, true)) + goto cleanup; + + if (cg_freeze_wait(cgroup, false)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * The test creates the following hierarchy: + * A + * / / \ \ + * B E I K + * /\ | + * C D F + * | + * G + * | + * H + * + * with a process in C, H and 3 processes in K. + * Then it tries to freeze and unfreeze the whole tree. + */ +static int test_cgfreezer_tree(const char *root) +{ + char *cgroup[10] = {0}; + int ret = KSFT_FAIL; + int i; + + cgroup[0] = cg_name(root, "cg_test_tree_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(cgroup[0], "B"); + if (!cgroup[1]) + goto cleanup; + + cgroup[2] = cg_name(cgroup[1], "C"); + if (!cgroup[2]) + goto cleanup; + + cgroup[3] = cg_name(cgroup[1], "D"); + if (!cgroup[3]) + goto cleanup; + + cgroup[4] = cg_name(cgroup[0], "E"); + if (!cgroup[4]) + goto cleanup; + + cgroup[5] = cg_name(cgroup[4], "F"); + if (!cgroup[5]) + goto cleanup; + + cgroup[6] = cg_name(cgroup[5], "G"); + if (!cgroup[6]) + goto cleanup; + + cgroup[7] = cg_name(cgroup[6], "H"); + if (!cgroup[7]) + goto cleanup; + + cgroup[8] = cg_name(cgroup[0], "I"); + if (!cgroup[8]) + goto cleanup; + + cgroup[9] = cg_name(cgroup[0], "K"); + if (!cgroup[9]) + goto cleanup; + + for (i = 0; i < 10; i++) + if (cg_create(cgroup[i])) + goto cleanup; + + cg_run_nowait(cgroup[2], child_fn, NULL); + cg_run_nowait(cgroup[7], child_fn, NULL); + cg_run_nowait(cgroup[9], child_fn, NULL); + cg_run_nowait(cgroup[9], child_fn, NULL); + cg_run_nowait(cgroup[9], child_fn, NULL); + + /* + * Wait until all child processes will enter + * corresponding cgroups. + */ + + if (cg_wait_for_proc_count(cgroup[2], 1) || + cg_wait_for_proc_count(cgroup[7], 1) || + cg_wait_for_proc_count(cgroup[9], 3)) + goto cleanup; + + /* + * Freeze B. + */ + if (cg_freeze_wait(cgroup[1], true)) + goto cleanup; + + /* + * Freeze F. + */ + if (cg_freeze_wait(cgroup[5], true)) + goto cleanup; + + /* + * Freeze G. + */ + if (cg_freeze_wait(cgroup[6], true)) + goto cleanup; + + /* + * Check that A and E are not frozen. + */ + if (cg_check_frozen(cgroup[0], false)) + goto cleanup; + + if (cg_check_frozen(cgroup[4], false)) + goto cleanup; + + /* + * Freeze A. Check that A, B and E are frozen. + */ + if (cg_freeze_wait(cgroup[0], true)) + goto cleanup; + + if (cg_check_frozen(cgroup[1], true)) + goto cleanup; + + if (cg_check_frozen(cgroup[4], true)) + goto cleanup; + + /* + * Unfreeze B, F and G + */ + if (cg_freeze_nowait(cgroup[1], false)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[5], false)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[6], false)) + goto cleanup; + + /* + * Check that C and H are still frozen. + */ + if (cg_check_frozen(cgroup[2], true)) + goto cleanup; + + if (cg_check_frozen(cgroup[7], true)) + goto cleanup; + + /* + * Unfreeze A. Check that A, C and K are not frozen. + */ + if (cg_freeze_wait(cgroup[0], false)) + goto cleanup; + + if (cg_check_frozen(cgroup[2], false)) + goto cleanup; + + if (cg_check_frozen(cgroup[9], false)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + for (i = 9; i >= 0 && cgroup[i]; i--) { + cg_destroy(cgroup[i]); + free(cgroup[i]); + } + + return ret; +} + +/* + * A fork bomb emulator. + */ +static int forkbomb_fn(const char *cgroup, void *arg) +{ + int ppid; + + fork(); + fork(); + + ppid = getppid(); + + while (getppid() == ppid) + usleep(1000); + + return getppid() == ppid; +} + +/* + * The test runs a fork bomb in a cgroup and tries to freeze it. + * Then it kills all processes and checks that cgroup isn't populated + * anymore. + */ +static int test_cgfreezer_forkbomb(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + + cgroup = cg_name(root, "cg_forkbomb_test"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + cg_run_nowait(cgroup, forkbomb_fn, NULL); + + usleep(100000); + + if (cg_freeze_wait(cgroup, true)) + goto cleanup; + + if (cg_killall(cgroup)) + goto cleanup; + + if (cg_wait_for_proc_count(cgroup, 0)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * The test creates two nested cgroups, freezes the parent + * and removes the child. Then it checks that the parent cgroup + * remains frozen and it's possible to create a new child + * without unfreezing. The new child is frozen too. + */ +static int test_cgfreezer_rmdir(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child = NULL; + + parent = cg_name(root, "cg_test_rmdir_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_rmdir_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_freeze_wait(parent, true)) + goto cleanup; + + if (cg_destroy(child)) + goto cleanup; + + if (cg_check_frozen(parent, true)) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + +/* + * The test creates two cgroups: A and B, runs a process in A + * and performs several migrations: + * 1) A (running) -> B (frozen) + * 2) B (frozen) -> A (running) + * 3) A (frozen) -> B (frozen) + * + * On each step it checks the actual state of both cgroups. + */ +static int test_cgfreezer_migrate(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup[2] = {0}; + int pid; + + cgroup[0] = cg_name(root, "cg_test_migrate_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(root, "cg_test_migrate_B"); + if (!cgroup[1]) + goto cleanup; + + if (cg_create(cgroup[0])) + goto cleanup; + + if (cg_create(cgroup[1])) + goto cleanup; + + pid = cg_run_nowait(cgroup[0], child_fn, NULL); + if (pid < 0) + goto cleanup; + + if (cg_wait_for_proc_count(cgroup[0], 1)) + goto cleanup; + + /* + * Migrate from A (running) to B (frozen) + */ + if (cg_freeze_wait(cgroup[1], true)) + goto cleanup; + + if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true)) + goto cleanup; + + if (cg_check_frozen(cgroup[0], false)) + goto cleanup; + + /* + * Migrate from B (frozen) to A (running) + */ + if (cg_enter_and_wait_for_frozen(cgroup[0], pid, false)) + goto cleanup; + + if (cg_check_frozen(cgroup[1], true)) + goto cleanup; + + /* + * Migrate from A (frozen) to B (frozen) + */ + if (cg_freeze_wait(cgroup[0], true)) + goto cleanup; + + if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true)) + goto cleanup; + + if (cg_check_frozen(cgroup[0], true)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (cgroup[0]) + cg_destroy(cgroup[0]); + free(cgroup[0]); + if (cgroup[1]) + cg_destroy(cgroup[1]); + free(cgroup[1]); + return ret; +} + +/* + * The test checks that ptrace works with a tracing process in a frozen cgroup. + */ +static int test_cgfreezer_ptrace(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + siginfo_t siginfo; + int pid; + + cgroup = cg_name(root, "cg_test_ptrace"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + pid = cg_run_nowait(cgroup, child_fn, NULL); + if (pid < 0) + goto cleanup; + + if (cg_wait_for_proc_count(cgroup, 1)) + goto cleanup; + + if (cg_freeze_wait(cgroup, true)) + goto cleanup; + + if (ptrace(PTRACE_SEIZE, pid, NULL, NULL)) + goto cleanup; + + if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL)) + goto cleanup; + + waitpid(pid, NULL, 0); + + /* + * Cgroup has to remain frozen, however the test task + * is in traced state. + */ + if (cg_check_frozen(cgroup, true)) + goto cleanup; + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) + goto cleanup; + + if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) + goto cleanup; + + if (cg_check_frozen(cgroup, true)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * Check if the process is stopped. + */ +static int proc_check_stopped(int pid) +{ + char buf[PAGE_SIZE]; + int len; + + len = proc_read_text(pid, "stat", buf, sizeof(buf)); + if (len == -1) { + debug("Can't get %d stat\n", pid); + return -1; + } + + if (strstr(buf, "(test_freezer) T ") == NULL) { + debug("Process %d in the unexpected state: %s\n", pid, buf); + return -1; + } + + return 0; +} + +/* + * Test that it's possible to freeze a cgroup with a stopped process. + */ +static int test_cgfreezer_stopped(const char *root) +{ + int pid, ret = KSFT_FAIL; + char *cgroup = NULL; + + cgroup = cg_name(root, "cg_test_stopped"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + pid = cg_run_nowait(cgroup, child_fn, NULL); + + if (cg_wait_for_proc_count(cgroup, 1)) + goto cleanup; + + if (kill(pid, SIGSTOP)) + goto cleanup; + + if (cg_check_frozen(cgroup, false)) + goto cleanup; + + if (cg_freeze_wait(cgroup, true)) + goto cleanup; + + if (cg_freeze_wait(cgroup, false)) + goto cleanup; + + if (proc_check_stopped(pid)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * Test that it's possible to freeze a cgroup with a ptraced process. + */ +static int test_cgfreezer_ptraced(const char *root) +{ + int pid, ret = KSFT_FAIL; + char *cgroup = NULL; + siginfo_t siginfo; + + cgroup = cg_name(root, "cg_test_ptraced"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + pid = cg_run_nowait(cgroup, child_fn, NULL); + + if (cg_wait_for_proc_count(cgroup, 1)) + goto cleanup; + + if (ptrace(PTRACE_SEIZE, pid, NULL, NULL)) + goto cleanup; + + if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL)) + goto cleanup; + + waitpid(pid, NULL, 0); + + if (cg_check_frozen(cgroup, false)) + goto cleanup; + + if (cg_freeze_wait(cgroup, true)) + goto cleanup; + + /* + * cg_check_frozen(cgroup, true) will fail here, + * because the task in in the TRACEd state. + */ + if (cg_freeze_wait(cgroup, false)) + goto cleanup; + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) + goto cleanup; + + if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +static int vfork_fn(const char *cgroup, void *arg) +{ + int pid = vfork(); + + if (pid == 0) + while (true) + sleep(1); + + return pid; +} + +/* + * Test that it's possible to freeze a cgroup with a process, + * which called vfork() and is waiting for a child. + */ +static int test_cgfreezer_vfork(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + + cgroup = cg_name(root, "cg_test_vfork"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + cg_run_nowait(cgroup, vfork_fn, NULL); + + if (cg_wait_for_proc_count(cgroup, 2)) + goto cleanup; + + if (cg_freeze_wait(cgroup, true)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +#define T(x) { x, #x } +struct cgfreezer_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_cgfreezer_simple), + T(test_cgfreezer_tree), + T(test_cgfreezer_forkbomb), + T(test_cgfreezer_rmdir), + T(test_cgfreezer_migrate), + T(test_cgfreezer_ptrace), + T(test_cgfreezer_stopped), + T(test_cgfreezer_ptraced), + T(test_cgfreezer_vfork), +}; +#undef T + +int main(int argc, char *argv[]) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} -- 2.20.1

6 years, 3 months

3
4
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror