We cannot use CLONE_VFORK because we also need to wait for the timeout signal.
Restore tests timeout by using the original fork() call in __run_test() but also in __TEST_F_IMPL(). Also fix a race condition when waiting for the test child process.
Because test metadata are shared between test processes, only the parent process must set the test PID (child). Otherwise, t->pid may be set to zero, leading to inconsistent error cases:
# RUN layout1.rule_on_mountpoint ... # rule_on_mountpoint: Test ended in some other way [127] # OK layout1.rule_on_mountpoint ok 20 layout1.rule_on_mountpoint
As safeguards, initialize the "status" variable with a valid exit code, and handle unknown test exits as errors.
The use of fork() introduces a new race condition in landlock/fs_test.c which seems to be specific to hostfs bind mounts, but I haven't found the root cause and it's difficult to trigger. I'll try to fix it with another patch.
Cc: Christian Brauner brauner@kernel.org Cc: Günther Noack gnoack@google.com Cc: Jakub Kicinski kuba@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Mark Brown broonie@kernel.org Cc: Shuah Khan shuah@kernel.org Cc: Will Drewry wad@chromium.org Cc: stable@vger.kernel.org Closes: https://lore.kernel.org/r/9341d4db-5e21-418c-bf9e-9ae2da7877e1@sirena.org.uk Fixes: a86f18903db9 ("selftests/harness: Fix interleaved scheduling leading to race conditions") Fixes: 24cf65a62266 ("selftests/harness: Share _metadata between forked processes") Signed-off-by: Mickaël Salaün mic@digikod.net Link: https://lore.kernel.org/r/20240621180605.834676-1-mic@digikod.net --- tools/testing/selftests/kselftest_harness.h | 43 ++++++++++++--------- 1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index b634969cbb6f..40723a6a083f 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -66,8 +66,6 @@ #include <sys/wait.h> #include <unistd.h> #include <setjmp.h> -#include <syscall.h> -#include <linux/sched.h>
#include "kselftest.h"
@@ -82,17 +80,6 @@ # define TH_LOG_ENABLED 1 #endif
-/* Wait for the child process to end but without sharing memory mapping. */ -static inline pid_t clone3_vfork(void) -{ - struct clone_args args = { - .flags = CLONE_VFORK, - .exit_signal = SIGCHLD, - }; - - return syscall(__NR_clone3, &args, sizeof(args)); -} - /** * TH_LOG() * @@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void) } \ if (setjmp(_metadata->env) == 0) { \ /* _metadata and potentially self are shared with all forks. */ \ - child = clone3_vfork(); \ + child = fork(); \ if (child == 0) { \ fixture_name##_setup(_metadata, self, variant->data); \ /* Let setup failure terminate early. */ \ @@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t) .sa_flags = SA_SIGINFO, }; struct sigaction saved_action; - int status; + /* + * Sets status so that WIFEXITED(status) returns true and + * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value + * should never be evaluated because of the waitpid(2) check and + * SIGALRM handling. + */ + int status = KSFT_FAIL << 8; + int child;
if (sigaction(SIGALRM, &action, &saved_action)) { t->exit_code = KSFT_FAIL; @@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t) __active_test = t; t->timed_out = false; alarm(t->timeout); - waitpid(t->pid, &status, 0); + child = waitpid(t->pid, &status, 0); + if (child == -1 && errno != EINTR) { + t->exit_code = KSFT_FAIL; + fprintf(TH_LOG_STREAM, + "# %s: Failed to wait for PID %d (errno: %d)\n", + t->name, t->pid, errno); + return; + } + alarm(0); if (sigaction(SIGALRM, &saved_action, NULL)) { t->exit_code = KSFT_FAIL; @@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t) WTERMSIG(status)); } } else { + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test ended in some other way [%u]\n", t->name, @@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_xfail *xfail; char test_name[1024]; const char *diagnostic; + int child;
/* reset test struct */ t->exit_code = KSFT_PASS; @@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f, fflush(stdout); fflush(stderr);
- t->pid = clone3_vfork(); - if (t->pid < 0) { + child = fork(); + if (child < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->exit_code = KSFT_FAIL; - } else if (t->pid == 0) { + } else if (child == 0) { setpgrp(); t->fn(t, variant); _exit(t->exit_code); } else { + t->pid = child; __wait_for_test(t); } ksft_print_msg(" %4s %s\n",
base-commit: 83a7eefedc9b56fe7bfeff13b6c7356688ffa670
I pushed it to my next branch.
Mark, Shuah, and others, please let me know if kselftest and KernelCI are better with that.
On Fri, Jun 21, 2024 at 08:06:05PM +0200, Mickaël Salaün wrote:
We cannot use CLONE_VFORK because we also need to wait for the timeout signal.
Restore tests timeout by using the original fork() call in __run_test() but also in __TEST_F_IMPL(). Also fix a race condition when waiting for the test child process.
Because test metadata are shared between test processes, only the parent process must set the test PID (child). Otherwise, t->pid may be set to zero, leading to inconsistent error cases:
# RUN layout1.rule_on_mountpoint ... # rule_on_mountpoint: Test ended in some other way [127] # OK layout1.rule_on_mountpoint ok 20 layout1.rule_on_mountpoint
As safeguards, initialize the "status" variable with a valid exit code, and handle unknown test exits as errors.
The use of fork() introduces a new race condition in landlock/fs_test.c which seems to be specific to hostfs bind mounts, but I haven't found the root cause and it's difficult to trigger. I'll try to fix it with another patch.
Cc: Christian Brauner brauner@kernel.org Cc: Günther Noack gnoack@google.com Cc: Jakub Kicinski kuba@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Mark Brown broonie@kernel.org Cc: Shuah Khan shuah@kernel.org Cc: Will Drewry wad@chromium.org Cc: stable@vger.kernel.org Closes: https://lore.kernel.org/r/9341d4db-5e21-418c-bf9e-9ae2da7877e1@sirena.org.uk Fixes: a86f18903db9 ("selftests/harness: Fix interleaved scheduling leading to race conditions") Fixes: 24cf65a62266 ("selftests/harness: Share _metadata between forked processes") Signed-off-by: Mickaël Salaün mic@digikod.net Link: https://lore.kernel.org/r/20240621180605.834676-1-mic@digikod.net
tools/testing/selftests/kselftest_harness.h | 43 ++++++++++++--------- 1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index b634969cbb6f..40723a6a083f 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -66,8 +66,6 @@ #include <sys/wait.h> #include <unistd.h> #include <setjmp.h> -#include <syscall.h> -#include <linux/sched.h> #include "kselftest.h" @@ -82,17 +80,6 @@ # define TH_LOG_ENABLED 1 #endif -/* Wait for the child process to end but without sharing memory mapping. */ -static inline pid_t clone3_vfork(void) -{
- struct clone_args args = {
.flags = CLONE_VFORK,
.exit_signal = SIGCHLD,
- };
- return syscall(__NR_clone3, &args, sizeof(args));
-}
/**
- TH_LOG()
@@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void) } \ if (setjmp(_metadata->env) == 0) { \ /* _metadata and potentially self are shared with all forks. */ \
child = clone3_vfork(); \
child = fork(); \ if (child == 0) { \ fixture_name##_setup(_metadata, self, variant->data); \ /* Let setup failure terminate early. */ \
@@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t) .sa_flags = SA_SIGINFO, }; struct sigaction saved_action;
- int status;
- /*
* Sets status so that WIFEXITED(status) returns true and
* WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
* should never be evaluated because of the waitpid(2) check and
* SIGALRM handling.
*/
- int status = KSFT_FAIL << 8;
- int child;
if (sigaction(SIGALRM, &action, &saved_action)) { t->exit_code = KSFT_FAIL; @@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t) __active_test = t; t->timed_out = false; alarm(t->timeout);
- waitpid(t->pid, &status, 0);
- child = waitpid(t->pid, &status, 0);
- if (child == -1 && errno != EINTR) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Failed to wait for PID %d (errno: %d)\n",
t->name, t->pid, errno);
return;
- }
- alarm(0); if (sigaction(SIGALRM, &saved_action, NULL)) { t->exit_code = KSFT_FAIL;
@@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t) WTERMSIG(status)); } } else {
fprintf(TH_LOG_STREAM, "# %s: Test ended in some other way [%u]\n", t->name,t->exit_code = KSFT_FAIL;
@@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_xfail *xfail; char test_name[1024]; const char *diagnostic;
- int child;
/* reset test struct */ t->exit_code = KSFT_PASS; @@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f, fflush(stdout); fflush(stderr);
- t->pid = clone3_vfork();
- if (t->pid < 0) {
- child = fork();
- if (child < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->exit_code = KSFT_FAIL;
- } else if (t->pid == 0) {
- } else if (child == 0) { setpgrp(); t->fn(t, variant); _exit(t->exit_code); } else {
__wait_for_test(t); } ksft_print_msg(" %4s %s\n",t->pid = child;
base-commit: 83a7eefedc9b56fe7bfeff13b6c7356688ffa670
2.45.2
On Fri, Jun 21, 2024 at 08:06:05PM +0200, Mickaël Salaün wrote:
We cannot use CLONE_VFORK because we also need to wait for the timeout signal.
Restore tests timeout by using the original fork() call in __run_test() but also in __TEST_F_IMPL(). Also fix a race condition when waiting for the test child process.
I've given this a quick test both by trying to apply it directly and through yesterday's -next and there's *something* funky going on, the epoll suite which was one of those hanging is somehow not getting run at all although the binaries do appear to be getting built and end up in my kselftest tarball that I'm deploying. However the ptrace test which was also hanging now manages to trigger it's timeout successfully when it that happens so I think whatever is going on with epoll probably an unrelated issue, I didn't get a chance to look at it properly.
Tested-by: Mark Brown broonie@kernel.org
linux-stable-mirror@lists.linaro.org