The kselftest framework uses the string logged when a test result is reported as the unique identifier for a test, using it to track test results between runs. The gup_longterm test completely fails to follow this pattern, it runs a single test function repeatedly with various parameters but each result report is a string logging an error message which is fixed between runs.
Since the code already logs each test uniquely before it starts refactor to also print this to a buffer, then use that name as the test result. This isn't especially pretty, really this test could use a more substantial cleanup.
Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/mm/gup_longterm.c | 163 ++++++++++++++++++++---------- 1 file changed, 107 insertions(+), 56 deletions(-)
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 21595b20bbc3..a849537f9372 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -35,6 +35,8 @@ static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd;
+static char test_name[1024]; + static __fsword_t get_fs_type(int fd) { struct statfs fs; @@ -93,33 +95,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) __fsword_t fs_type = get_fs_type(fd); bool should_work; char *mem; + int result = KSFT_PASS; int ret;
+ if (fd < 0) { + result = KSFT_FAIL; + goto report; + } + if (ftruncate(fd, size)) { if (errno == ENOENT) { skip_test_dodgy_fs("ftruncate()"); } else { - ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + ksft_print_msg("ftruncate() failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + goto report; } return; }
if (fallocate(fd, 0, 0, size)) { - if (size == pagesize) - ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno)); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize) { + ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; }
mem = mmap(NULL, size, PROT_READ | PROT_WRITE, shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - if (size == pagesize || shared) - ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno)); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize || shared) { + ksft_print_msg("mmap() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; }
/* Fault in the page such that GUP-fast can pin it directly. */ @@ -134,7 +151,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) */ ret = mprotect(mem, size, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno)); + ksft_print_msg("mprotect() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; goto munmap; } /* FALLTHROUGH */ @@ -147,12 +165,14 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) type == TEST_TYPE_RW_FAST;
if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + result = KSFT_SKIP; break; }
if (rw && shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; return; } /* @@ -169,14 +189,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) { - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + result = KSFT_SKIP; break; } else if (ret && errno == EFAULT) { - ksft_test_result(!should_work, "Should have failed\n"); + if (should_work) + result = KSFT_FAIL; + else + result = KSFT_PASS; break; } else if (ret) { - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n", - strerror(errno)); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; break; }
@@ -189,7 +214,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */ - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) + result = KSFT_PASS; + else + result = KSFT_FAIL; break; } #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -199,8 +227,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* io_uring always pins pages writable. */ if (shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); - return; + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; + goto report; } should_work = !shared || fs_supports_writable_longterm_pinning(fs_type); @@ -208,8 +237,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed (%s)\n", - strerror(-ret)); + ksft_print_msg("io_uring_queue_init() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; break; } /* @@ -222,17 +252,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) { - ksft_test_result(!should_work, "Should have failed (%s)\n", - strerror(errno)); + if (should_work) { + ksft_print_msg("Should have failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + } else { + result = KSFT_PASS; + } } else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */ - ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n", - strerror(-ret)); + ksft_print_msg("io_uring_register_buffers() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; } else { - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) { + result = KSFT_PASS; + } else { + ksft_print_msg("Should have worked\n"); + result = KSFT_FAIL; + } io_uring_unregister_buffers(&ring); }
@@ -246,21 +287,32 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
munmap: munmap(mem, size); +report: + ksft_test_result(result, "%s\n", test_name); }
typedef void (*test_fn)(int fd, size_t size);
+static void log_test_start(const char *name, ...) +{ + va_list args; + va_start(args, name); + + vsnprintf(test_name, sizeof(test_name), name, args); + ksft_print_msg("[RUN] %s\n", test_name); + + va_end(args); +} + static void run_with_memfd(test_fn fn, const char *desc) { int fd;
- ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0); - if (fd < 0) { - ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno)); - return; - } + if (fd < 0) + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
fn(fd, pagesize); close(fd); @@ -271,23 +323,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc) FILE *file; int fd;
- ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc);
file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno)); - return; - } - - fd = fileno(file); - if (fd < 0) { - ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno)); - goto close; + ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno)); + fd = -1; + } else { + fd = fileno(file); + if (fd < 0) { + ksft_print_msg("fileno() failed (%s)\n", strerror(errno)); + } }
fn(fd, pagesize); -close: - fclose(file); + + if (file) + fclose(file); }
static void run_with_local_tmpfile(test_fn fn, const char *desc) @@ -295,22 +347,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) char filename[] = __FILE__"_tmpfile_XXXXXX"; int fd;
- ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc); + log_test_start("%s ... with local tmpfile", desc);
fd = mkstemp(filename); - if (fd < 0) { - ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno)); - return; - } + if (fd < 0) + ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
if (unlink(filename)) { - ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno)); - goto close; + ksft_print_msg("unlink() failed (%s)\n", strerror(errno)); + close(fd); + fd = -1; }
fn(fd, pagesize); -close: - close(fd); + + if (fd >= 0) + close(fd); }
static void run_with_memfd_hugetlb(test_fn fn, const char *desc, @@ -319,15 +371,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, int flags = MFD_HUGETLB; int fd;
- ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno)); - return; + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); }
fn(fd, hugetlbsize);
--- base-commit: 82f2b0b97b36ee3fcddf0f0780a9a0825d52fec3 change-id: 20250514-selftests-mm-gup-longterm-dups-68aa4e0fcc88
Best regards,
On 15/05/25 2:27 pm, Mark Brown wrote:
The kselftest framework uses the string logged when a test result is reported as the unique identifier for a test, using it to track test results between runs. The gup_longterm test completely fails to follow this pattern, it runs a single test function repeatedly with various parameters but each result report is a string logging an error message which is fixed between runs.
Since the code already logs each test uniquely before it starts refactor to also print this to a buffer, then use that name as the test result. This isn't especially pretty, really this test could use a more substantial cleanup.
Signed-off-by: Mark Brown broonie@kernel.org
tools/testing/selftests/mm/gup_longterm.c | 163 ++++++++++++++++++++---------- 1 file changed, 107 insertions(+), 56 deletions(-)
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 21595b20bbc3..a849537f9372 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -35,6 +35,8 @@ static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd; +static char test_name[1024];
- static __fsword_t get_fs_type(int fd) { struct statfs fs;
@@ -93,33 +95,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) __fsword_t fs_type = get_fs_type(fd); bool should_work; char *mem;
- int result = KSFT_PASS; int ret;
- if (fd < 0) {
result = KSFT_FAIL;
goto report;
- }
- if (ftruncate(fd, size)) { if (errno == ENOENT) { skip_test_dodgy_fs("ftruncate()"); } else {
ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno));
ksft_print_msg("ftruncate() failed (%s)\n",
strerror(errno));
result = KSFT_FAIL;
} return; }goto report;
if (fallocate(fd, 0, 0, size)) {
if (size == pagesize)
ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno));
else
ksft_test_result_skip("need more free huge pages\n");
return;
if (size == pagesize) {
ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
result = KSFT_FAIL;
} else {
ksft_print_msg("need more free huge pages\n");
result = KSFT_SKIP;
}
}goto report;
mem = mmap(NULL, size, PROT_READ | PROT_WRITE, shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) {
if (size == pagesize || shared)
ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno));
else
ksft_test_result_skip("need more free huge pages\n");
return;
if (size == pagesize || shared) {
ksft_print_msg("mmap() failed (%s)\n", strerror(errno));
result = KSFT_FAIL;
} else {
ksft_print_msg("need more free huge pages\n");
result = KSFT_SKIP;
}
}goto report;
/* Fault in the page such that GUP-fast can pin it directly. */ @@ -134,7 +151,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) */ ret = mprotect(mem, size, PROT_READ); if (ret) {
ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno));
ksft_print_msg("mprotect() failed (%s)\n", strerror(errno));
} /* FALLTHROUGH */result = KSFT_FAIL; goto munmap;
@@ -147,12 +165,14 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) type == TEST_TYPE_RW_FAST; if (gup_fd < 0) {
ksft_test_result_skip("gup_test not available\n");
ksft_print_msg("gup_test not available\n");
}result = KSFT_SKIP; break;
if (rw && shared && fs_is_unknown(fs_type)) {
ksft_test_result_skip("Unknown filesystem\n");
ksft_print_msg("Unknown filesystem\n");
} /*result = KSFT_SKIP; return;
@@ -169,14 +189,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) {
ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n");
ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n");
} else if (ret && errno == EFAULT) {result = KSFT_SKIP; break;
ksft_test_result(!should_work, "Should have failed\n");
if (should_work)
result = KSFT_FAIL;
else
} else if (ret) {result = KSFT_PASS; break;
ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n",
strerror(errno));
ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n",
strerror(errno));
}result = KSFT_FAIL; break;
@@ -189,7 +214,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */
ksft_test_result(should_work, "Should have worked\n");
if (should_work)
result = KSFT_PASS;
Missed printing "Should have worked" here.
else
break; } #ifdef LOCAL_CONFIG_HAVE_LIBURINGresult = KSFT_FAIL;
@@ -199,8 +227,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* io_uring always pins pages writable. */ if (shared && fs_is_unknown(fs_type)) {
ksft_test_result_skip("Unknown filesystem\n");
return;
ksft_print_msg("Unknown filesystem\n");
result = KSFT_SKIP;
} should_work = !shared || fs_supports_writable_longterm_pinning(fs_type);goto report;
@@ -208,8 +237,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) {
ksft_test_result_skip("io_uring_queue_init() failed (%s)\n",
strerror(-ret));
ksft_print_msg("io_uring_queue_init() failed (%s)\n",
strerror(-ret));
} /*result = KSFT_SKIP; break;
@@ -222,17 +252,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) {
ksft_test_result(!should_work, "Should have failed (%s)\n",
strerror(errno));
if (should_work) {
ksft_print_msg("Should have failed (%s)\n",
strerror(errno));
result = KSFT_FAIL;
} else {
result = KSFT_PASS;
} else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */}
ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n",
strerror(-ret));
ksft_print_msg("io_uring_register_buffers() failed (%s)\n",
strerror(-ret));
} else {result = KSFT_SKIP;
ksft_test_result(should_work, "Should have worked\n");
if (should_work) {
result = KSFT_PASS;
} else {
ksft_print_msg("Should have worked\n");
result = KSFT_FAIL;
}} io_uring_unregister_buffers(&ring);
@@ -246,21 +287,32 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) munmap: munmap(mem, size); +report:
- ksft_test_result(result, "%s\n", test_name); }
typedef void (*test_fn)(int fd, size_t size); +static void log_test_start(const char *name, ...) +{
- va_list args;
- va_start(args, name);
- vsnprintf(test_name, sizeof(test_name), name, args);
- ksft_print_msg("[RUN] %s\n", test_name);
- va_end(args);
+}
- static void run_with_memfd(test_fn fn, const char *desc) { int fd;
- ksft_print_msg("[RUN] %s ... with memfd\n", desc);
- log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
- if (fd < 0) {
ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno));
return;
- }
- if (fd < 0)
ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
fn(fd, pagesize); close(fd); @@ -271,23 +323,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc) FILE *file; int fd;
- ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
- log_test_start("%s ... with tmpfile", desc);
file = tmpfile(); if (!file) {
ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno));
return;
- }
- fd = fileno(file);
- if (fd < 0) {
ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno));
goto close;
ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno));
fd = -1;
- } else {
fd = fileno(file);
if (fd < 0) {
ksft_print_msg("fileno() failed (%s)\n", strerror(errno));
}}
fn(fd, pagesize); -close:
- fclose(file);
- if (file)
}fclose(file);
static void run_with_local_tmpfile(test_fn fn, const char *desc) @@ -295,22 +347,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) char filename[] = __FILE__"_tmpfile_XXXXXX"; int fd;
- ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
- log_test_start("%s ... with local tmpfile", desc);
fd = mkstemp(filename);
- if (fd < 0) {
ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno));
return;
- }
- if (fd < 0)
ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
if (unlink(filename)) {
ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno));
goto close;
ksft_print_msg("unlink() failed (%s)\n", strerror(errno));
close(fd);
}fd = -1;
fn(fd, pagesize); -close:
- close(fd);
- if (fd >= 0)
}close(fd);
static void run_with_memfd_hugetlb(test_fn fn, const char *desc, @@ -319,15 +371,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, int flags = MFD_HUGETLB; int fd;
- ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
- log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) {
ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno));
return;
}ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
fn(fd, hugetlbsize);
base-commit: 82f2b0b97b36ee3fcddf0f0780a9a0825d52fec3 change-id: 20250514-selftests-mm-gup-longterm-dups-68aa4e0fcc88
Best regards,
On Thu, May 15, 2025 at 03:05:07PM +0530, Dev Jain wrote:
On 15/05/25 2:27 pm, Mark Brown wrote:
@@ -189,7 +214,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */
ksft_test_result(should_work, "Should have worked\n");
if (should_work)
result = KSFT_PASS;
Missed printing "Should have worked" here.
I didn't think that output was particularly useful separate to the overall test result (which is logged on exit from the function), it's just the test result more than diagnostic information.
Please delete unneeded context from mails when replying. Doing this makes it much easier to find your reply in the message, helping ensure it won't be missed by people scrolling through the irrelevant quoted material.
On 15/05/25 3:11 pm, Mark Brown wrote:
On Thu, May 15, 2025 at 03:05:07PM +0530, Dev Jain wrote:
On 15/05/25 2:27 pm, Mark Brown wrote:
@@ -189,7 +214,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */
ksft_test_result(should_work, "Should have worked\n");
if (should_work)
result = KSFT_PASS;
Missed printing "Should have worked" here.
I didn't think that output was particularly useful separate to the overall test result (which is logged on exit from the function), it's just the test result more than diagnostic information.
No hard opinion.
Please delete unneeded context from mails when replying. Doing this makes it much easier to find your reply in the message, helping ensure it won't be missed by people scrolling through the irrelevant quoted material.
You have mentioned that before, sorry my bad! I also hate it :)
On 15.05.25 10:57, Mark Brown wrote:
The kselftest framework uses the string logged when a test result is reported as the unique identifier for a test, using it to track test results between runs. The gup_longterm test completely fails to follow this pattern, it runs a single test function repeatedly with various parameters but each result report is a string logging an error message which is fixed between runs.
As the person who wrote that test (that you apparently didn't CC for some reason), what exactly is the problem with that?
We run tests. If all pass, we're happy, if one fails, we investigate.
On Fri, May 16, 2025 at 10:02:16AM +0200, David Hildenbrand wrote:
On 15.05.25 10:57, Mark Brown wrote:
The kselftest framework uses the string logged when a test result is reported as the unique identifier for a test, using it to track test results between runs. The gup_longterm test completely fails to follow this pattern, it runs a single test function repeatedly with various parameters but each result report is a string logging an error message which is fixed between runs.
As the person who wrote that test (that you apparently didn't CC for some
I just CCed whoever get_maintainers told me to CC for the patch.
reason), what exactly is the problem with that?
We run tests. If all pass, we're happy, if one fails, we investigate.
None of the tooling is able to either distinguish between the multiple tests that are being run in gup_longterm, nor compare the results of multiple runs effectively. If all the tests run they report themselves as being duplicates of the same test name, if one of them starts failing the effect is that one of the duplicates disappears and we get an entirely new test that's never passed reported. If multiple tests fail it's even worse. This means that UIs displaying test results end up reporting things unclearly (Was there a regression or was a new test that never worked added? What was the test?). Since it's difficult to track the tests between runs tooling that does reporting of things like "This last worked in X" in the UI doesn't work properly, and tool driven bisection of issues similarly struggles since it can't tell what's going on with any of the tests between runs.
Basically, the output is garbled and vastly less useful for people running this as a matter of routine or as part of a broader kselftest run. For example with my own automation I probably won't notice that a previously working test failed unless every single test fails, and newly added tests that never worked are a much lower priority to the point where I may never look at them depending on where they are.
If a selftest is reporting multiple tests it should report them with names that are stable and unique.
On 16.05.25 14:29, Mark Brown wrote:
On Fri, May 16, 2025 at 10:02:16AM +0200, David Hildenbrand wrote:
On 15.05.25 10:57, Mark Brown wrote:
The kselftest framework uses the string logged when a test result is reported as the unique identifier for a test, using it to track test results between runs. The gup_longterm test completely fails to follow this pattern, it runs a single test function repeatedly with various parameters but each result report is a string logging an error message which is fixed between runs.
As the person who wrote that test (that you apparently didn't CC for some
I just CCed whoever get_maintainers told me to CC for the patch.
For the future, it's a good idea to look for the author of the problematic bits.
reason), what exactly is the problem with that?
We run tests. If all pass, we're happy, if one fails, we investigate.
None of the tooling is able to either distinguish between the multiple tests that are being run in gup_longterm, nor compare the results of multiple runs effectively. If all the tests run they report themselves as being duplicates of the same test name, if one of them starts failing the effect is that one of the duplicates disappears and we get an entirely new test that's never passed reported. If multiple tests fail it's even worse. This means that UIs displaying test results end up reporting things unclearly (Was there a regression or was a new test that never worked added? What was the test?). Since it's difficult to track the tests between runs tooling that does reporting of things like "This last worked in X" in the UI doesn't work properly, and tool driven bisection of issues similarly struggles since it can't tell what's going on with any of the tests between runs.
Okay, so this is purely to make tooling happy. Humans are smart enough to figure it out.
What mechanism do we have in place to reliably prevent that from happening? And is this at least documented somewhere ("unique identifier for a test")>
I guess when using kselftest_harness, we get a single identifier per tests (and much less output) just automatically.
Basically, the output is garbled and vastly less useful for people
running this as a matter of routine or as part of a broader kselftest run. For example with my own automation I probably won't notice that a previously working test failed unless every single test fails, and newly added tests that never worked are a much lower priority to the point where I may never look at them depending on where they are.
If a selftest is reporting multiple tests it should report them with names that are stable and unique.
I'm afraid we have other such tests that report duplicate conditions. cow.c is likely another candidate (written by me ;) ).
Probably, the affected tests should be converted to use kselftest_harness, where we just report the result for a single tests, and not the individual assertions.
That would reduce the output of these tests drastically as well.
So that is likely the way to clean this up properly and make tooling happy?
On Fri, May 16, 2025 at 02:55:24PM +0200, David Hildenbrand wrote:
On 16.05.25 14:29, Mark Brown wrote:
On Fri, May 16, 2025 at 10:02:16AM +0200, David Hildenbrand wrote:
reason), what exactly is the problem with that?
We run tests. If all pass, we're happy, if one fails, we investigate.
None of the tooling is able to either distinguish between the multiple tests that are being run in gup_longterm, nor compare the results of multiple runs effectively. If all the tests run they report themselves
Okay, so this is purely to make tooling happy. Humans are smart enough to figure it out.
Not just the tools, humans interact with the selftests and their results via tools (unless I'm actively working on something and running the specific test for that thing I'm unlikely to ever directly look at results...).
What mechanism do we have in place to reliably prevent that from happening? And is this at least documented somewhere ("unique identifier for a test")>
It comes from TAP, I can't see a direct reference to anything in the kernel documentation. The main thing enforcing this is people running tooling noticing bad output, unfortunately.
I guess when using kselftest_harness, we get a single identifier per tests (and much less output) just automatically.
Nothing stops something using the harness from logging during the test, the harness tests actually tend to be a little chattier than a lot of the things written directly to kselftest.h as they log the start and end of tests as well as the actual TAP result line as standard.
If a selftest is reporting multiple tests it should report them with names that are stable and unique.
I'm afraid we have other such tests that report duplicate conditions. cow.c is likely another candidate (written by me ;) ).
That one's not come up for me (this was one of four different patches for mm selftests I sent the other day cleaning up duplicate test names).
Probably, the affected tests should be converted to use kselftest_harness, where we just report the result for a single tests, and not the individual assertions.
That would reduce the output of these tests drastically as well.
So that is likely the way to clean this up properly and make tooling happy?
That'd certainly work, though doing that is more surgery on the test than I personally have the time/enthusiasm for right now.
Having the tests being chatty isn't a terrible thing, so long as they're not so chatty they cause execution time problems on serial console - it can be useful if they do blow up and you're looking at a failure on a machine you only have automated access to.
On 16.05.25 15:09, Mark Brown wrote:
On Fri, May 16, 2025 at 02:55:24PM +0200, David Hildenbrand wrote:
On 16.05.25 14:29, Mark Brown wrote:
On Fri, May 16, 2025 at 10:02:16AM +0200, David Hildenbrand wrote:
reason), what exactly is the problem with that?
We run tests. If all pass, we're happy, if one fails, we investigate.
None of the tooling is able to either distinguish between the multiple tests that are being run in gup_longterm, nor compare the results of multiple runs effectively. If all the tests run they report themselves
Okay, so this is purely to make tooling happy. Humans are smart enough to figure it out.
Not just the tools, humans interact with the selftests and their results via tools (unless I'm actively working on something and running the specific test for that thing I'm unlikely to ever directly look at results...).
Yes, that makes sense.
What mechanism do we have in place to reliably prevent that from happening? And is this at least documented somewhere ("unique identifier for a test")>
It comes from TAP, I can't see a direct reference to anything in the kernel documentation. The main thing enforcing this is people running tooling noticing bad output, unfortunately.
:(
I guess when using kselftest_harness, we get a single identifier per tests (and much less output) just automatically.
Nothing stops something using the harness from logging during the test, the harness tests actually tend to be a little chattier than a lot of the things written directly to kselftest.h as they log the start and end of tests as well as the actual TAP result line as standard.
If a selftest is reporting multiple tests it should report them with names that are stable and unique.
I'm afraid we have other such tests that report duplicate conditions. cow.c is likely another candidate (written by me ;) ).
That one's not come up for me (this was one of four different patches for mm selftests I sent the other day cleaning up duplicate test names).
$ sudo ./cow TAP version 13 # [INFO] detected PMD size: 2048 KiB # [INFO] detected THP size: 16 KiB # [INFO] detected THP size: 32 KiB # [INFO] detected THP size: 64 KiB # [INFO] detected THP size: 128 KiB # [INFO] detected THP size: 256 KiB # [INFO] detected THP size: 512 KiB # [INFO] detected THP size: 1024 KiB # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled 1..778 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page ok 2 No leak from parent into child # [RUN] Basic COW after fork() ... with PTE-mapped THP (16 kB) ok 3 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped-out, PTE-mapped THP (16 kB) ok 4 No leak from parent into child # [RUN] Basic COW after fork() ... with single PTE of THP (16 kB) ok 5 No leak from parent into child # [RUN] Basic COW after fork() ... with single PTE of swapped-out THP (16 kB) ok 6 No leak from parent into child # [RUN] Basic COW after fork() ... with partially mremap()'ed THP (16 kB) ok 7 No leak from parent into child # [RUN] Basic COW after fork() ... with partially shared THP (16 kB) ok 8 No leak from parent into child # [RUN] Basic COW after fork() ... with PTE-mapped THP (32 kB) ok 9 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped-out, PTE-mapped THP (32 kB) ok 10 No leak from parent into child # [RUN] Basic COW after fork() ... with single PTE of THP (32 kB) ok 11 No leak from parent into child # [RUN] Basic COW after fork() ... with single PTE of swapped-out THP (32 kB) ok 12 No leak from parent into child # [RUN] Basic COW after fork() ... with partially mremap()'ed THP (32 kB) ok 13 No leak from parent into child # [RUN] Basic COW after fork() ... with partially shared THP (32 kB) ok 14 No leak from parent into child # [RUN] Basic COW after fork() ... with PTE-mapped THP (64 kB) ok 15 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped-out, PTE-mapped THP (64 kB) ok 16 No leak from parent into child # [RUN] Basic COW after fork() ... with single PTE of THP (64 kB) ok 17 No leak from parent into child # [RUN] Basic COW after fork() ... with single PTE of swapped-out THP (64 kB) ok 18 No leak from parent into child # [RUN] Basic COW after fork() ... with partially mremap()'ed THP (64 kB) ok 19 No leak from parent into child ...
Aren't the duplicate "No leak from parent into child" the problematic bits? But maybe I am getting it wrong, what needs to be "unique" :)
Probably, the affected tests should be converted to use kselftest_harness, where we just report the result for a single tests, and not the individual assertions.
That would reduce the output of these tests drastically as well.
So that is likely the way to clean this up properly and make tooling happy?
That'd certainly work, though doing that is more surgery on the test than I personally have the time/enthusiasm for right now.
Same over here.
But probably if we touch it, we should just clean it up right away. Well, if we decide that that is the right cleanup. (you mention something like that in your patch description :) )
On Fri, May 16, 2025 at 04:12:08PM +0200, David Hildenbrand wrote:
On 16.05.25 15:09, Mark Brown wrote:
I'm afraid we have other such tests that report duplicate conditions. cow.c is likely another candidate (written by me ;) ).
That one's not come up for me (this was one of four different patches for mm selftests I sent the other day cleaning up duplicate test names).
$ sudo ./cow
...
1..778 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page ok 2 No leak from parent into child
Aren't the duplicate "No leak from parent into child" the problematic bits? But maybe I am getting it wrong, what needs to be "unique" :)
Ah, yes - that's got the same issue. I'm not running that program one way or another, it's not immediately clear to me why not - I can't see any sign of it being invoked by the runner script but I also can't see anything that I'd expect to stop that happening. I'll have to have a poke at it, thanks for flagging that.
[Converting to kselftet_harness]
That'd certainly work, though doing that is more surgery on the test than I personally have the time/enthusiasm for right now.
Same over here.
But probably if we touch it, we should just clean it up right away. Well, if we decide that that is the right cleanup. (you mention something like that in your patch description :)
OTOH there's something to be said for just making incremental improvements in the tests where we can, they tend not to get huge amounts of love in general which means perfect can very much be the enemy of good. If there's some immediate prospect of someone doing a bigger refactoring then that'd be amazing, but if not then it seems useful to make things play better with the automation for now.
On 16.05.25 20:07, Mark Brown wrote:
On Fri, May 16, 2025 at 04:12:08PM +0200, David Hildenbrand wrote:
On 16.05.25 15:09, Mark Brown wrote:
I'm afraid we have other such tests that report duplicate conditions. cow.c is likely another candidate (written by me ;) ).
That one's not come up for me (this was one of four different patches for mm selftests I sent the other day cleaning up duplicate test names).
$ sudo ./cow
...
1..778 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page ok 2 No leak from parent into child
Aren't the duplicate "No leak from parent into child" the problematic bits? But maybe I am getting it wrong, what needs to be "unique" :)
Ah, yes - that's got the same issue. I'm not running that program one way or another, it's not immediately clear to me why not - I can't see any sign of it being invoked by the runner script but I also can't see anything that I'd expect to stop that happening. I'll have to have a poke at it, thanks for flagging that.
[Converting to kselftet_harness]
That'd certainly work, though doing that is more surgery on the test than I personally have the time/enthusiasm for right now.
Same over here.
But probably if we touch it, we should just clean it up right away. Well, if we decide that that is the right cleanup. (you mention something like that in your patch description :)
OTOH there's something to be said for just making incremental improvements in the tests where we can, they tend not to get huge amounts of love in general which means perfect can very much be the enemy of good. If there's some immediate prospect of someone doing a bigger refactoring then that'd be amazing, but if not then it seems useful to make things play better with the automation for now.
I would agree if it would be a handful of small changes.
But here we are already at
1 file changed, 107 insertions(+), 56 deletions(-)
On Mon, May 19, 2025 at 03:28:47PM +0200, David Hildenbrand wrote:
On 16.05.25 20:07, Mark Brown wrote:
On Fri, May 16, 2025 at 04:12:08PM +0200, David Hildenbrand wrote:
[Converting to kselftet_harness]
That'd certainly work, though doing that is more surgery on the test than I personally have the time/enthusiasm for right now.
Same over here.
But probably if we touch it, we should just clean it up right away. Well, if we decide that that is the right cleanup. (you mention something like that in your patch description :)
OTOH there's something to be said for just making incremental improvements in the tests where we can, they tend not to get huge amounts of love in general which means perfect can very much be the enemy of good. If there's some immediate prospect of someone doing a bigger refactoring then that'd be amazing, but if not then it seems useful to make things play better with the automation for now.
I would agree if it would be a handful of small changes.
But here we are already at
1 file changed, 107 insertions(+), 56 deletions(-)
Those are pretty mechanical changes due to the amount of chat from the program rather than a more substantial reconstruction of the logic which is rather more risky for a drive by.
On Mon, May 19, 2025 at 03:28:47PM +0200, David Hildenbrand wrote:
On 16.05.25 20:07, Mark Brown wrote:
On Fri, May 16, 2025 at 04:12:08PM +0200, David Hildenbrand wrote:
[Converting to kselftet_harness]
That'd certainly work, though doing that is more surgery on the test than I personally have the time/enthusiasm for right now.
Same over here.
But probably if we touch it, we should just clean it up right away. Well, if we decide that that is the right cleanup. (you mention something like that in your patch description :)
OTOH there's something to be said for just making incremental improvements in the tests where we can, they tend not to get huge amounts of love in general which means perfect can very much be the
I would agree if it would be a handful of small changes.
But here we are already at
1 file changed, 107 insertions(+), 56 deletions(-)
So, I did have a brief poke at this which confirmed my instinct that blocking a fix for this (and the other similarly structured tests like cow) seems disproportionate.
The biggest issue is the configuration of fixtures, the harness really wants the set of test variants to be fixed at compile time (see the FIXTURE_ macros) but we're covering the dynamically discovered list of huge page sizes. I'm not seeing a super tasteful way to deal with that mismatch of assumptions, the most obvious thing is to switch to having a static list of possible huge page sizes and skipping if that size isn't there but there's an awful lot of potential huge page sizes most of which won't appear on any given system. That'd be both ugly code and bad ergonomics for anyone actively working with the test.
linux-kselftest-mirror@lists.linaro.org