After modifying truncate_error_folio(), we expect memory_failure() will return 0 instead of MF_FAILED. Also, we want to make sure memory_failure() signaling function is same.
Test that memory_failure() returns 0 for guest_memfd, where .error_remove_folio() is handled by not actually truncating, and returning MF_DELAYED.
In addition, test that SIGBUS signaling behavior is not changed before and after this modification.
There are two kinds of guest memory failure injections - madvise or debugfs. When memory failure is injected using madvise, the MF_ACTION_REQUIRED flag is set, and the page is mapped and dirty, the process should get a SIGBUS. When memory is failure is injected using debugfs, the KILL_EARLY machine check memory corruption kill policy is set, and the page is mapped and dirty, the process should get a SIGBUS.
Co-developed-by: Ackerley Tng ackerleytng@google.com Signed-off-by: Ackerley Tng ackerleytng@google.com Signed-off-by: Lisa Wang wyihan@google.com --- .../testing/selftests/kvm/guest_memfd_test.c | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+)
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index e7d9aeb418d3..7bcf8d2d5d4d 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -10,6 +10,8 @@ #include <errno.h> #include <stdio.h> #include <fcntl.h> +#include <linux/prctl.h> +#include <sys/prctl.h>
#include <linux/bitmap.h> #include <linux/falloc.h> @@ -97,6 +99,171 @@ static void test_fault_overflow(int fd, size_t total_size) test_fault_sigbus(fd, total_size, total_size * 4); }
+static unsigned long addr_to_pfn(void *addr) +{ + const uint64_t pagemap_pfn_mask = BIT(54) - 1; + const uint64_t pagemap_page_present = BIT(63); + uint64_t page_info; + ssize_t n_bytes; + int pagemap_fd; + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_ASSERT(pagemap_fd > 0, "Opening pagemap should succeed."); + + n_bytes = pread(pagemap_fd, &page_info, 8, (uint64_t)addr / page_size * 8); + TEST_ASSERT(n_bytes == 8, "pread of pagemap failed. n_bytes=%ld", n_bytes); + + close(pagemap_fd); + + TEST_ASSERT(page_info & pagemap_page_present, "The page for addr should be present"); + return page_info & pagemap_pfn_mask; +} + +static void write_memory_failure(unsigned long pfn, bool mark, int return_code) +{ + char path[PATH_MAX]; + char *filename; + char buf[20]; + int ret; + int len; + int fd; + + filename = mark ? "corrupt-pfn" : "unpoison-pfn"; + snprintf(path, PATH_MAX, "/sys/kernel/debug/hwpoison/%s", filename); + + fd = open(path, O_WRONLY); + TEST_ASSERT(fd > 0, "Failed to open %s.", path); + + len = snprintf(buf, sizeof(buf), "0x%lx\n", pfn); + if (len < 0 || (unsigned int)len > sizeof(buf)) + TEST_ASSERT(0, "snprintf failed or truncated."); + + ret = write(fd, buf, len); + if (return_code == 0) { + /* + * If the memory_failure() returns 0, write() should be successful, + * which returns how many bytes it writes. + */ + TEST_ASSERT(ret > 0, "Writing memory failure (path: %s) failed: %s", path, + strerror(errno)); + } else { + TEST_ASSERT_EQ(ret, -1); + /* errno is memory_failure() return code. */ + TEST_ASSERT_EQ(errno, return_code); + } + + close(fd); +} + +static void mark_memory_failure(unsigned long pfn, int return_code) +{ + write_memory_failure(pfn, true, return_code); +} + +static void unmark_memory_failure(unsigned long pfn, int return_code) +{ + write_memory_failure(pfn, false, return_code); +} + +enum memory_failure_injection_method { + MF_INJECT_DEBUGFS, + MF_INJECT_MADVISE, +}; + +static void do_test_memory_failure(int fd, size_t total_size, + enum memory_failure_injection_method method, int kill_config, + bool map_page, bool dirty_page, bool sigbus_expected, + int return_code) +{ + unsigned long memory_failure_pfn; + char *memory_failure_addr; + char *mem; + int ret; + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + memory_failure_addr = mem + page_size; + if (dirty_page) + *memory_failure_addr = 'A'; + else + READ_ONCE(*memory_failure_addr); + + /* Fault in page to read pfn, then unmap page for testing if needed. */ + memory_failure_pfn = addr_to_pfn(memory_failure_addr); + if (!map_page) + madvise(memory_failure_addr, page_size, MADV_DONTNEED); + + ret = prctl(PR_MCE_KILL, PR_MCE_KILL_SET, kill_config, 0, 0); + TEST_ASSERT_EQ(ret, 0); + + ret = 0; + switch (method) { + case MF_INJECT_DEBUGFS: { + /* DEBUGFS injection handles return_code test inside the mark_memory_failure(). */ + if (sigbus_expected) + TEST_EXPECT_SIGBUS(mark_memory_failure(memory_failure_pfn, return_code)); + else + mark_memory_failure(memory_failure_pfn, return_code); + break; + } + case MF_INJECT_MADVISE: { + /* + * MADV_HWPOISON uses get_user_pages() so the page will always + * be faulted in at the point of memory_failure() + */ + if (sigbus_expected) + TEST_EXPECT_SIGBUS(ret = madvise(memory_failure_addr, + page_size, MADV_HWPOISON)); + else + ret = madvise(memory_failure_addr, page_size, MADV_HWPOISON); + + if (return_code == 0) + TEST_ASSERT(ret == return_code, "Memory failure failed. Errno: %s", + strerror(errno)); + else { + /* errno is memory_failure() return code. */ + TEST_ASSERT_EQ(errno, return_code); + } + break; + } + default: + TEST_FAIL("Unhandled memory failure injection method %d.", method); + } + + TEST_EXPECT_SIGBUS(READ_ONCE(*memory_failure_addr)); + TEST_EXPECT_SIGBUS(*memory_failure_addr = 'A'); + + ret = munmap(mem, total_size); + TEST_ASSERT(!ret, "munmap() should succeed."); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, + total_size); + TEST_ASSERT(!ret, "Truncate the entire file (cleanup) should succeed."); + + ret = prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_DEFAULT, 0, 0); + TEST_ASSERT_EQ(ret, 0); + + unmark_memory_failure(memory_failure_pfn, 0); +} + +static void test_memory_failure(int fd, size_t total_size) +{ + do_test_memory_failure(fd, total_size, MF_INJECT_DEBUGFS, PR_MCE_KILL_EARLY, true, true, true, 0); + do_test_memory_failure(fd, total_size, MF_INJECT_DEBUGFS, PR_MCE_KILL_EARLY, true, false, false, 0); + do_test_memory_failure(fd, total_size, MF_INJECT_DEBUGFS, PR_MCE_KILL_EARLY, false, true, false, 0); + do_test_memory_failure(fd, total_size, MF_INJECT_DEBUGFS, PR_MCE_KILL_LATE, true, true, false, 0); + do_test_memory_failure(fd, total_size, MF_INJECT_DEBUGFS, PR_MCE_KILL_LATE, true, false, false, 0); + do_test_memory_failure(fd, total_size, MF_INJECT_DEBUGFS, PR_MCE_KILL_LATE, false, true, false, 0); + /* + * If madvise() is used to inject errors, memory_failure() handling is invoked with the + * MF_ACTION_REQUIRED flag set, aligned with memory failure handling for a consumed memory + * error, where the machine check memory corruption kill policy is ignored. Hence, testing with + * PR_MCE_KILL_DEFAULT covers all cases. + */ + do_test_memory_failure(fd, total_size, MF_INJECT_MADVISE, PR_MCE_KILL_DEFAULT, true, true, true, 0); + do_test_memory_failure(fd, total_size, MF_INJECT_MADVISE, PR_MCE_KILL_DEFAULT, true, false, false, 0); +} + static void test_fault_private(int fd, size_t total_size) { test_fault_sigbus(fd, 0, total_size); @@ -273,6 +440,7 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { gmem_test(mmap_supported, vm, flags); gmem_test(fault_overflow, vm, flags); + gmem_test(memory_failure, vm, flags); } else { gmem_test(fault_private, vm, flags); }