June 2025 - Linux-kselftest-mirror

[PATCH v4] selftests: filesystems: Add functional test for the abort file in fusectl

by Chen Linxuan

This patch add a simple functional test for the "abort" file in fusectlfs (/sys/fs/fuse/connections/ID/about). A simple fuse daemon is added for testing. Related discussion can be found in the link below. Link: https://lore.kernel.org/all/CAOQ4uxjKFXOKQxPpxtS6G_nR0tpw95w0GiO68UcWg_OBhm… Signed-off-by: Chen Linxuan <chenlinxuan(a)uniontech.com> Acked-by: Shuah Khan <skhan(a)linuxfoundation.org> Reviewed-by: Amir Goldstein <amir73il(a)gmail.com> Co-developed-by: Miklos Szeredi <miklos(a)szeredi.hu> Reviewed-by: Miklos Szeredi <miklos(a)szeredi.hu> --- Changes in v4: - Apply patch suggested by Miklos Szeredi - Setting up a userns environment for testing - Fix a EBUSY on umount/rmdir - Link to v3: https://lore.kernel.org/all/20250610021007.2800329-2-chenlinxuan@uniontech.… Changes in v3: - Apply changes suggested by Amir Goldstein - Rename the test subdir to filesystems/fuse - Verify errno when connection is aborted - Apply changes suggested by Shuah Khan - Update commit message - Link to v2: https://lore.kernel.org/all/20250517012350.10317-2-chenlinxuan@uniontech.co… Changes in v2: - Apply changes suggested by Amir Goldstein - Check errno - Link to v1: https://lore.kernel.org/all/20250515073449.346774-2-chenlinxuan@uniontech.c… --- MAINTAINERS | 1 + tools/testing/selftests/Makefile | 1 + .../selftests/filesystems/fuse/.gitignore | 3 + .../selftests/filesystems/fuse/Makefile | 21 +++ .../selftests/filesystems/fuse/fuse_mnt.c | 146 ++++++++++++++++++ .../selftests/filesystems/fuse/fusectl_test.c | 140 +++++++++++++++++ 6 files changed, 312 insertions(+) create mode 100644 tools/testing/selftests/filesystems/fuse/.gitignore create mode 100644 tools/testing/selftests/filesystems/fuse/Makefile create mode 100644 tools/testing/selftests/filesystems/fuse/fuse_mnt.c create mode 100644 tools/testing/selftests/filesystems/fuse/fusectl_test.c diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa163..04d90432c1841 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9901,6 +9901,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git F: Documentation/filesystems/fuse* F: fs/fuse/ F: include/uapi/linux/fuse.h +F: tools/testing/selftests/filesystems/fuse/ FUTEX SUBSYSTEM M: Thomas Gleixner <tglx(a)linutronix.de> diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 339b31e6a6b59..c37a76a8ca214 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -36,6 +36,7 @@ TARGETS += filesystems/fat TARGETS += filesystems/overlayfs TARGETS += filesystems/statmount TARGETS += filesystems/mount-notify +TARGETS += filesystems/fuse TARGETS += firmware TARGETS += fpu TARGETS += ftrace diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore b/tools/testing/selftests/filesystems/fuse/.gitignore new file mode 100644 index 0000000000000..3e72e742d08e8 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +fuse_mnt +fusectl_test diff --git a/tools/testing/selftests/filesystems/fuse/Makefile b/tools/testing/selftests/filesystems/fuse/Makefile new file mode 100644 index 0000000000000..612aad69a93aa --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) + +TEST_GEN_PROGS := fusectl_test +TEST_GEN_FILES := fuse_mnt + +include ../../lib.mk + +VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse +endif + +VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS := -lfuse -pthread +endif + +$(OUTPUT)/fuse_mnt: CFLAGS += $(VAR_CFLAGS) +$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS) diff --git a/tools/testing/selftests/filesystems/fuse/fuse_mnt.c b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c new file mode 100644 index 0000000000000..d12b17f30fadc --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fusectl test file-system + * Creates a simple FUSE filesystem with a single read-write file (/test) + */ + +#define FUSE_USE_VERSION 26 + +#include <fuse.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static char *content; +static size_t content_size = 0; +static const char test_path[] = "/test"; + +static int test_getattr(const char *path, struct stat *st) +{ + memset(st, 0, sizeof(*st)); + + if (!strcmp(path, "/")) { + st->st_mode = S_IFDIR | 0755; + st->st_nlink = 2; + return 0; + } + + if (!strcmp(path, test_path)) { + st->st_mode = S_IFREG | 0664; + st->st_nlink = 1; + st->st_size = content_size; + return 0; + } + + return -ENOENT; +} + +static int test_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi) +{ + if (strcmp(path, "/")) + return -ENOENT; + + filler(buf, ".", NULL, 0); + filler(buf, "..", NULL, 0); + filler(buf, test_path + 1, NULL, 0); + + return 0; +} + +static int test_open(const char *path, struct fuse_file_info *fi) +{ + if (strcmp(path, test_path)) + return -ENOENT; + + return 0; +} + +static int test_read(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if (!content || content_size == 0) + return 0; + + if (offset >= content_size) + return 0; + + if (offset + size > content_size) + size = content_size - offset; + + memcpy(buf, content + offset, size); + + return size; +} + +static int test_write(const char *path, const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + size_t new_size; + + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if(offset > content_size) + return -EINVAL; + + new_size = MAX(offset + size, content_size); + + if (new_size > content_size) + content = realloc(content, new_size); + + content_size = new_size; + + if (!content) + return -ENOMEM; + + memcpy(content + offset, buf, size); + + return size; +} + +static int test_truncate(const char *path, off_t size) +{ + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if (size == 0) { + free(content); + content = NULL; + content_size = 0; + return 0; + } + + content = realloc(content, size); + + if (!content) + return -ENOMEM; + + if (size > content_size) + memset(content + content_size, 0, size - content_size); + + content_size = size; + return 0; +} + +static struct fuse_operations memfd_ops = { + .getattr = test_getattr, + .readdir = test_readdir, + .open = test_open, + .read = test_read, + .write = test_write, + .truncate = test_truncate, +}; + +int main(int argc, char *argv[]) +{ + return fuse_main(argc, argv, &memfd_ops, NULL); +} diff --git a/tools/testing/selftests/filesystems/fuse/fusectl_test.c b/tools/testing/selftests/filesystems/fuse/fusectl_test.c new file mode 100644 index 0000000000000..8d124d1cacb26 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fusectl_test.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Chen Linxuan <chenlinxuan(a)uniontech.com> + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <dirent.h> +#include <sched.h> +#include <linux/limits.h> + +#include "../../kselftest_harness.h" + +#define FUSECTL_MOUNTPOINT "/sys/fs/fuse/connections" +#define FUSE_MOUNTPOINT "/tmp/fuse_mnt_XXXXXX" +#define FUSE_DEVICE "/dev/fuse" +#define FUSECTL_TEST_VALUE "1" + +static void write_file(struct __test_metadata *const _metadata, + const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + + ASSERT_GE(fd, 0); + ASSERT_EQ(write(fd, val, len), len); + ASSERT_EQ(close(fd), 0); +} + +FIXTURE(fusectl){ + char fuse_mountpoint[sizeof(FUSE_MOUNTPOINT)]; + int connection; +}; + +FIXTURE_SETUP(fusectl) +{ + const char *fuse_mnt_prog = "./fuse_mnt"; + int status, pid; + struct stat statbuf; + uid_t uid = getuid(); + gid_t gid = getgid(); + char buf[32]; + + /* Setup userns */ + ASSERT_EQ(unshare(CLONE_NEWNS|CLONE_NEWUSER), 0); + sprintf(buf, "0 %d 1", uid); + write_file(_metadata, "/proc/self/uid_map", buf); + write_file(_metadata, "/proc/self/setgroups", "deny"); + sprintf(buf, "0 %d 1", gid); + write_file(_metadata, "/proc/self/gid_map", buf); + ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0); + + strcpy(self->fuse_mountpoint, FUSE_MOUNTPOINT); + + if (!mkdtemp(self->fuse_mountpoint)) + SKIP(return, + "Failed to create FUSE mountpoint %s", + strerror(errno)); + + if (access(FUSECTL_MOUNTPOINT, F_OK)) + SKIP(return, + "FUSE control filesystem not mounted"); + + pid = fork(); + if (pid < 0) + SKIP(return, + "Failed to fork FUSE daemon process: %s", + strerror(errno)); + + if (pid == 0) { + execlp(fuse_mnt_prog, fuse_mnt_prog, self->fuse_mountpoint, NULL); + exit(errno); + } + + waitpid(pid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + SKIP(return, + "Failed to start FUSE daemon %s", + strerror(WEXITSTATUS(status))); + } + + if (stat(self->fuse_mountpoint, &statbuf)) + SKIP(return, + "Failed to stat FUSE mountpoint %s", + strerror(errno)); + + self->connection = statbuf.st_dev; +} + +FIXTURE_TEARDOWN(fusectl) +{ + umount2(self->fuse_mountpoint, MNT_DETACH); + rmdir(self->fuse_mountpoint); +} + +TEST_F(fusectl, abort) +{ + char path_buf[PATH_MAX]; + int abort_fd, test_fd, ret; + + sprintf(path_buf, "/sys/fs/fuse/connections/%d/abort", self->connection); + + ASSERT_EQ(0, access(path_buf, F_OK)); + + abort_fd = open(path_buf, O_WRONLY); + ASSERT_GE(abort_fd, 0); + + sprintf(path_buf, "%s/test", self->fuse_mountpoint); + + test_fd = open(path_buf, O_RDWR); + ASSERT_GE(test_fd, 0); + + ret = read(test_fd, path_buf, sizeof(path_buf)); + ASSERT_EQ(ret, 0); + + ret = write(test_fd, "test", sizeof("test")); + ASSERT_EQ(ret, sizeof("test")); + + ret = lseek(test_fd, 0, SEEK_SET); + ASSERT_GE(ret, 0); + + ret = write(abort_fd, FUSECTL_TEST_VALUE, sizeof(FUSECTL_TEST_VALUE)); + ASSERT_GT(ret, 0); + + close(abort_fd); + + ret = read(test_fd, path_buf, sizeof(path_buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOTCONN); +} + +TEST_HARNESS_MAIN -- 2.43.0

1 month, 2 weeks

5
5
0 0

[PATCH v4 00/38] Mediated vPMU 4.0 for x86

by Mingwei Zhang

With joint effort from the upstream KVM community, we come up with the 4th version of mediated vPMU for x86. We have made the following changes on top of the previous RFC v3. v3 -> v4 - Rebase whole patchset on 6.14-rc3 base. - Address Peter's comments on Perf part. - Address Sean's comments on KVM part. * Change key word "passthrough" to "mediated" in all patches * Change static enabling to user space dynamic enabling via KVM_CAP_PMU_CAPABILITY. * Only support GLOBAL_CTRL save/restore with VMCS exec_ctrl, drop the MSR save/retore list support for GLOBAL_CTRL, thus the support of mediated vPMU is constrained to SapphireRapids and later CPUs on Intel side. * Merge some small changes into a single patch. - Address Sandipan's comment on invalid pmu pointer. - Add back "eventsel_hw" and "fixed_ctr_ctrl_hw" to avoid to directly manipulate pmc->eventsel and pmu->fixed_ctr_ctrl. Testing (Intel side): - Perf-based legacy vPMU (force emulation on/off) * Kselftests pmu_counters_test, pmu_event_filter_test and vmx_pmu_caps_test pass. * KUT PMU tests pmu, pmu_lbr, pmu_pebs pass. * Basic perf counting/sampling tests in 3 scenarios, guest-only, host-only and host-guest coexistence all pass. - Mediated vPMU (force emulation on/off) * Kselftests pmu_counters_test, pmu_event_filter_test and vmx_pmu_caps_test pass. * KUT PMU tests pmu, pmu_lbr, pmu_pebs pass. * Basic perf counting/sampling tests in 3 scenarios, guest-only, host-only and host-guest coexistence all pass. - Failures. All above tests passed on Intel Granite Rapids as well except a failure on KUT/pmu_pebs. * GP counter 0 (0xfffffffffffe): PEBS record (written seq 0) is verified (including size, counters and cfg). * The pebs_data_cfg (0xb500000000) doesn't match with the effective MSR_PEBS_DATA_CFG (0x0). * This failure has nothing to do with this mediated vPMU patch set. The failure is caused by Granite Rapids supported timed PEBS which needs extra support on Qemu and KUT/pmu_pebs. These extra support would be sent in separate patches later. Testing (AMD side): - Kselftests pmu_counters_test, pmu_event_filter_test and vmx_pmu_caps_test all pass - legacy guest with KUT/pmu: * qmeu option: -cpu host, -perfctr-core * when set force_emulation_prefix=1, passes * when set force_emulation_prefix=0, passes - perfmon-v1 guest with KUT/pmu: * qmeu option: -cpu host, -perfmon-v2 * when set force_emulation_prefix=1, passes * when set force_emulation_prefix=0, passes - perfmon-v2 guest with KUT/pmu: * qmeu option: -cpu host * when set force_emulation_prefix=1, passes * when set force_emulation_prefix=0, passes - perf_fuzzer (perfmon-v2): * fails with soft lockup in guest in current version. * culprit could be between 6.13 ~ 6.14-rc3 within KVM * Series tested on 6.12 and 6.13 without issue. Note: a QEMU series is needed to run mediated vPMU v4: - https://lore.kernel.org/all/20250324123712.34096-1-dapeng1.mi@linux.intel.c… History: - RFC v3: https://lore.kernel.org/all/20240801045907.4010984-1-mizhang@google.com/ - RFC v2: https://lore.kernel.org/all/20240506053020.3911940-1-mizhang@google.com/ - RFC v1: https://lore.kernel.org/all/20240126085444.324918-1-xiong.y.zhang@linux.int… Dapeng Mi (18): KVM: x86/pmu: Introduce enable_mediated_pmu global parameter KVM: x86/pmu: Check PMU cpuid configuration from user space KVM: x86: Rename vmx_vmentry/vmexit_ctrl() helpers KVM: x86/pmu: Add perf_capabilities field in struct kvm_host_values{} KVM: x86/pmu: Move PMU_CAP_{FW_WRITES,LBR_FMT} into msr-index.h header KVM: VMX: Add macros to wrap around {secondary,tertiary}_exec_controls_changebit() KVM: x86/pmu: Check if mediated vPMU can intercept rdpmc KVM: x86/pmu/vmx: Save/load guest IA32_PERF_GLOBAL_CTRL with vm_exit/entry_ctrl KVM: x86/pmu: Optimize intel/amd_pmu_refresh() helpers KVM: x86/pmu: Setup PMU MSRs' interception mode KVM: x86/pmu: Handle PMU MSRs interception and event filtering KVM: x86/pmu: Switch host/guest PMU context at vm-exit/vm-entry KVM: x86/pmu: Handle emulated instruction for mediated vPMU KVM: nVMX: Add macros to simplify nested MSR interception setting KVM: selftests: Add mediated vPMU supported for pmu tests KVM: Selftests: Support mediated vPMU for vmx_pmu_caps_test KVM: Selftests: Fix pmu_counters_test error for mediated vPMU KVM: x86/pmu: Expose enable_mediated_pmu parameter to user space Kan Liang (8): perf: Support get/put mediated PMU interfaces perf: Skip pmu_ctx based on event_type perf: Clean up perf ctx time perf: Add a EVENT_GUEST flag perf: Add generic exclude_guest support perf: Add switch_guest_ctx() interface perf/x86: Support switch_guest_ctx interface perf/x86/intel: Support PERF_PMU_CAP_MEDIATED_VPMU Mingwei Zhang (5): perf/x86: Forbid PMI handler when guest own PMU perf/x86/core: Plumb mediated PMU capability from x86_pmu to x86_pmu_cap KVM: x86/pmu: Exclude PMU MSRs in vmx_get_passthrough_msr_slot() KVM: x86/pmu: introduce eventsel_hw to prepare for pmu event filtering KVM: nVMX: Add nested virtualization support for mediated PMU Sandipan Das (4): perf/x86/core: Do not set bit width for unavailable counters KVM: x86/pmu: Add AMD PMU registers to direct access list KVM: x86/pmu/svm: Set GuestOnly bit and clear HostOnly bit when guest write to event selectors perf/x86/amd: Support PERF_PMU_CAP_MEDIATED_VPMU for AMD host Xiong Zhang (3): x86/irq: Factor out common code for installing kvm irq handler perf: core/x86: Register a new vector for KVM GUEST PMI KVM: x86/pmu: Register KVM_GUEST_PMI_VECTOR handler arch/x86/events/amd/core.c | 2 + arch/x86/events/core.c | 40 +- arch/x86/events/intel/core.c | 5 + arch/x86/include/asm/hardirq.h | 1 + arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/irq.h | 2 +- arch/x86/include/asm/irq_vectors.h | 5 +- arch/x86/include/asm/kvm-x86-pmu-ops.h | 2 + arch/x86/include/asm/kvm_host.h | 10 + arch/x86/include/asm/msr-index.h | 18 +- arch/x86/include/asm/perf_event.h | 1 + arch/x86/include/asm/vmx.h | 1 + arch/x86/kernel/idt.c | 1 + arch/x86/kernel/irq.c | 39 +- arch/x86/kvm/cpuid.c | 15 + arch/x86/kvm/pmu.c | 254 ++++++++- arch/x86/kvm/pmu.h | 45 ++ arch/x86/kvm/svm/pmu.c | 148 ++++- arch/x86/kvm/svm/svm.c | 26 + arch/x86/kvm/svm/svm.h | 2 +- arch/x86/kvm/vmx/capabilities.h | 11 +- arch/x86/kvm/vmx/nested.c | 68 ++- arch/x86/kvm/vmx/pmu_intel.c | 224 ++++++-- arch/x86/kvm/vmx/vmx.c | 89 +-- arch/x86/kvm/vmx/vmx.h | 11 +- arch/x86/kvm/x86.c | 63 ++- arch/x86/kvm/x86.h | 2 + include/linux/perf_event.h | 47 +- kernel/events/core.c | 519 ++++++++++++++---- .../beauty/arch/x86/include/asm/irq_vectors.h | 5 +- .../selftests/kvm/include/kvm_test_harness.h | 13 + .../testing/selftests/kvm/include/kvm_util.h | 3 + .../selftests/kvm/include/x86/processor.h | 8 + tools/testing/selftests/kvm/lib/kvm_util.c | 23 + .../selftests/kvm/x86/pmu_counters_test.c | 24 +- .../selftests/kvm/x86/pmu_event_filter_test.c | 8 +- .../selftests/kvm/x86/vmx_pmu_caps_test.c | 2 +- 37 files changed, 1480 insertions(+), 258 deletions(-) base-commit: 0ad2507d5d93f39619fc42372c347d6006b64319 -- 2.49.0.395.g12beb8f557-goog

1 month, 2 weeks

8
126
0 0

[PATCH] selftests: breakpoints: use suspend_stats to reliably check suspend success

by Moon Hee Lee

The step_after_suspend_test verifies that the system successfully suspended and resumed by setting a timerfd and checking whether the timer fully expired. However, this method is unreliable due to timing races. In practice, the system may take time to enter suspend, during which the timer may expire just before or during the transition. As a result, the remaining time after resume may show non-zero nanoseconds, even if suspend/resume completed successfully. This leads to false test failures. Replace the timer-based check with a read from /sys/power/suspend_stats/success. This counter is incremented only after a full suspend/resume cycle, providing a reliable and race-free indicator. Also remove the unused file descriptor for /sys/power/state, which remained after switching to a system() call to trigger suspend [1]. [1] https://lore.kernel.org/all/20240930224025.2858767-1-yifei.l.liu@oracle.com/ Fixes: c66be905cda2 ("selftests: breakpoints: use remaining time to check if suspend succeed") Signed-off-by: Moon Hee Lee <moonhee.lee.ca(a)gmail.com> --- .../breakpoints/step_after_suspend_test.c | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index 8d275f03e977..8d233ac95696 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -127,22 +127,42 @@ int run_test(int cpu) return KSFT_PASS; } +/* + * Reads the suspend success count from sysfs. + * Returns the count on success or exits on failure. + */ +static int get_suspend_success_count_or_fail(void) +{ + FILE *fp; + int val; + + fp = fopen("/sys/power/suspend_stats/success", "r"); + if (!fp) + ksft_exit_fail_msg( + "Failed to open suspend_stats/success: %s\n", + strerror(errno)); + + if (fscanf(fp, "%d", &val) != 1) { + fclose(fp); + ksft_exit_fail_msg( + "Failed to read suspend success count\n"); + } + + fclose(fp); + return val; +} + void suspend(void) { - int power_state_fd; int timerfd; int err; + int count_before; + int count_after; struct itimerspec spec = {}; if (getuid() != 0) ksft_exit_skip("Please run the test as root - Exiting.\n"); - power_state_fd = open("/sys/power/state", O_RDWR); - if (power_state_fd < 0) - ksft_exit_fail_msg( - "open(\"/sys/power/state\") failed %s)\n", - strerror(errno)); - timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0); if (timerfd < 0) ksft_exit_fail_msg("timerfd_create() failed\n"); @@ -152,14 +172,15 @@ void suspend(void) if (err < 0) ksft_exit_fail_msg("timerfd_settime() failed\n"); + count_before = get_suspend_success_count_or_fail(); + system("(echo mem > /sys/power/state) 2> /dev/null"); - timerfd_gettime(timerfd, &spec); - if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0) + count_after = get_suspend_success_count_or_fail(); + if (count_after <= count_before) ksft_exit_fail_msg("Failed to enter Suspend state\n"); close(timerfd); - close(power_state_fd); } int main(int argc, char **argv) -- 2.43.0

1 month, 3 weeks

3
3
0 0

[PATCH v2 0/6] VMM can handle guest SEA via KVM_EXIT_ARM_SEA

by Jiaqi Yan

Problem ======= When host APEI is unable to claim synchronous external abort (SEA) during stage-2 guest abort, today KVM directly injects an async SError into the VCPU then resumes it. The injected SError usually results in unpleasant guest kernel panic. One of the major situation of guest SEA is when VCPU consumes recoverable uncorrected memory error (UER), which is not uncommon at all in modern datacenter servers with large amounts of physical memory. Although SError and guest panic is sufficient to stop the propagation of corrupted memory there is room to recover from an UER in a more graceful manner. Proposed Solution ================= Alternatively KVM can replay the SEA to the faulting VCPU, via existing KVM_SET_VCPU_EVENTS API. If the memory poison consumption or the fault that cause SEA is not from guest kernel, the blast radius can be limited to the consuming or faulting guest userspace process, so the VM can keep running. In addition, instead of doing under the hood without involving userspace, there are benefits to redirect the SEA to VMM: - VM customers care about the disruptions caused by memory errors, and VMM usually has the responsibility to start the process of notifying the customers of memory error events in their VMs. For example some cloud provider emits a critical log in their observability UI [1], and provides playbook for customers on how to mitigate disruptions to their workloads. - VMM can protect future memory error consumption by unmapping the poisoned pages from stage-2 page table with KVM userfault, or by splitting the memslot that contains the poisoned guest pages [2]. - VMM can keep track of SEA events in the VM. When VMM thinks the status on the host or the VM is bad enough, e.g. number of distinct SEAs exceeds a threshold, it can restart the VM on another healthy host. - Behavior parity with x86 architecture. When machine check exception (MCE) is caused by VCPU, kernel or KVM signals userspace SIGBUS to let VMM either recover from the MCE, or terminate itself with VM. The prior RFC proposes to implement SIGBUS on arm64 as well, but Marc preferred VCPU exit over signal [3]. However, implementation aside, returning SEA to VMM is on par with returning MCE to VMM. Once SEA is redirected to VMM, among other actions, VMM is encouraged to inject external aborts into the faulting VCPU, which is already supported by KVM on arm64. We notice injecting instruction abort is not fully supported by KVM_SET_VCPU_EVENTS. Complement it in the patchset. New UAPIs ========= This patchset introduces following userspace-visiable changes to empower VMM to control what happens next for SEA on guest memory: - KVM_CAP_ARM_SEA_TO_USER. While taking SEA, if userspace has enabled this new capability at VM creation, and the SEA is not caused by memory allocated for stage-2 translation table, instead of injecting SError, return KVM_EXIT_ARM_SEA to userspace. - KVM_EXIT_ARM_SEA. This is the VM exit reason VMM gets. The details about the SEA is provided in arm_sea as much as possible, including sanitized ESR value at EL2, if guest virtual and physical addresses (GPA and GVA) are available and the values if available. - KVM_CAP_ARM_INJECT_EXT_IABT. VMM today can inject external data abort to VCPU via KVM_SET_VCPU_EVENTS API. However, in case of instruction abort, VMM cannot inject it via KVM_SET_VCPU_EVENTS. KVM_CAP_ARM_INJECT_EXT_IABT is just a natural extend to KVM_CAP_ARM_INJECT_EXT_DABT that tells VMM KVM_SET_VCPU_EVENTS now supports external instruction abort. * From v1 [4]: - Rebased on commit 4d62121ce9b5 ("KVM: arm64: vgic-debug: Avoid dereferencing NULL ITE pointer"). - Sanitize ESR_EL2 before reporting it to userspace. - Do not do KVM_EXIT_ARM_SEA when SEA is caused by memory allocated to stage-2 translation table. [1] https://cloud.google.com/solutions/sap/docs/manage-host-errors [2] https://lore.kernel.org/kvm/20250109204929.1106563-1-jthoughton@google.com [3] https://lore.kernel.org/kvm/86pljbqqh0.wl-maz@kernel.org [4] https://lore.kernel.org/kvm/20250505161412.1926643-1-jiaqiyan@google.com Jiaqi Yan (5): KVM: arm64: VM exit to userspace to handle SEA KVM: arm64: Set FnV for VCPU when FAR_EL2 is invalid KVM: selftests: Test for KVM_EXIT_ARM_SEA and KVM_CAP_ARM_SEA_TO_USER KVM: selftests: Test for KVM_CAP_INJECT_EXT_IABT Documentation: kvm: new uAPI for handling SEA Raghavendra Rao Ananta (1): KVM: arm64: Allow userspace to inject external instruction aborts Documentation/virt/kvm/api.rst | 128 ++++++- arch/arm64/include/asm/kvm_emulate.h | 67 ++++ arch/arm64/include/asm/kvm_host.h | 8 + arch/arm64/include/asm/kvm_ras.h | 2 +- arch/arm64/include/uapi/asm/kvm.h | 3 +- arch/arm64/kvm/arm.c | 6 + arch/arm64/kvm/guest.c | 13 +- arch/arm64/kvm/inject_fault.c | 3 + arch/arm64/kvm/mmu.c | 59 ++- include/uapi/linux/kvm.h | 12 + tools/arch/arm64/include/asm/esr.h | 2 + tools/arch/arm64/include/uapi/asm/kvm.h | 3 +- tools/testing/selftests/kvm/Makefile.kvm | 2 + .../testing/selftests/kvm/arm64/inject_iabt.c | 98 +++++ .../testing/selftests/kvm/arm64/sea_to_user.c | 340 ++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 1 + 16 files changed, 718 insertions(+), 29 deletions(-) create mode 100644 tools/testing/selftests/kvm/arm64/inject_iabt.c create mode 100644 tools/testing/selftests/kvm/arm64/sea_to_user.c -- 2.49.0.1266.g31b7d2e469-goog

1 month, 3 weeks

2
20
0 0

[PATCH v3 00/15] Consolidate iommu page table implementations (AMD)

by Jason Gunthorpe

[All the precursor patches are merged now and AMD/RISCV/VTD conversions are written] Currently each of the iommu page table formats duplicates all of the logic to maintain the page table and perform map/unmap/etc operations. There are several different versions of the algorithms between all the different formats. The io-pgtable system provides an interface to help isolate the page table code from the iommu driver, but doesn't provide tools to implement the common algorithms. This makes it very hard to improve the state of the pagetable code under the iommu domains as any proposed improvement needs to alter a large number of different driver code paths. Combined with a lack of software based testing this makes improvement in this area very hard. iommufd wants several new page table operations: - More efficient map/unmap operations, using iommufd's batching logic - unmap that returns the physical addresses into a batch as it progresses - cut that allows splitting areas so large pages can have holes poked in them dynamically (ie guestmemfd hitless shared/private transitions) - More agressive freeing of table memory to avoid waste - Fragmenting large pages so that dirty tracking can be more granular - Reassembling large pages so that VMs can run at full IO performance in migration/dirty tracking error flows - KHO integration for kernel live upgrade Together these are algorithmically complex enough to be a very significant task to go and implement in all the page table formats we support. Just the "server" focused drivers use almost all the formats (ARMv8 S1&S2 / x86 PAE / AMDv1 / VT-D SS / RISCV) Instead of doing the duplicated work, this series takes the first step to consolidate the algorithms into one places. In spirit it is similar to the work Christoph did a few years back to pull the redundant get_user_pages() implementations out of the arch code into core MM. This unlocked a great deal of improvement in that space in the following years. I would like to see the same benefit in iommu as well. My first RFC showed a bigger picture with all most all formats and more algorithms. This series reorganizes that to be narrowly focused on just enough to convert the AMD driver to use the new mechanism. kunit tests are provided that allow good testing of the algorithms and all formats on x86, nothing is arch specific. AMD is one of the simpler options as the HW is quite uniform with few different options/bugs while still requiring the complicated contiguous pages support. The HW also has a very simple range based invalidation approach that is easy to implement. The AMD v1 and AMD v2 page table formats are implemented bit for bit identical to the current code, tested using a compare kunit test that checks against the io-pgtable version (on github, see below). Updating the AMD driver to replace the io-pgtable layer with the new stuff is fairly straightforward now. The layering is fixed up in the new version so that all the invalidation goes through function pointers. Several small fixing patches have come out of this as I've been fixing the problems that the test suite uncovers in the current code, and implementing the fixed version in iommupt. On performance, there is a quite wide variety of implementation designs across all the drivers. Looking at some key performance across the main formats: iommu_map(): pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 53,66 , 51,63 , 19.19 (AMDV1) 256*2^12, 386,1909 , 367,1795 , 79.79 256*2^21, 362,1633 , 355,1556 , 77.77 2^12, 56,62 , 52,59 , 11.11 (AMDv2) 256*2^12, 405,1355 , 357,1292 , 72.72 256*2^21, 393,1160 , 358,1114 , 67.67 2^12, 55,65 , 53,62 , 14.14 (VTD second stage) 256*2^12, 391,518 , 332,512 , 35.35 256*2^21, 383,635 , 336,624 , 46.46 2^12, 57,65 , 55,63 , 12.12 (ARM 64 bit) 256*2^12, 380,389 , 361,369 , 2.02 256*2^21, 358,419 , 345,400 , 13.13 iommu_unmap(): pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 69,88 , 65,85 , 23.23 (AMDv1) 256*2^12, 353,6498 , 331,6029 , 94.94 256*2^21, 373,6014 , 360,5706 , 93.93 2^12, 71,72 , 66,69 , 4.04 (AMDv2) 256*2^12, 228,891 , 206,871 , 76.76 256*2^21, 254,721 , 245,711 , 65.65 2^12, 69,87 , 65,82 , 20.20 (VTD second stage) 256*2^12, 210,321 , 200,315 , 36.36 256*2^21, 255,349 , 238,342 , 30.30 2^12, 72,77 , 68,74 , 8.08 (ARM 64 bit) 256*2^12, 521,357 , 447,346 , -29.29 256*2^21, 489,358 , 433,345 , -25.25 * Above numbers include additional patches to remove the iommu_pgsize() overheads. gcc 13.3.0, i7-12700 This version provides fairly consistent performance across formats. ARM unmap performance is quite different because this version supports contiguous pages and uses a very different algorithm for unmapping. Though why it is so worse compared to AMDv1 I haven't figured out yet. The per-format commits include a more detailed chart. There is a second branch: https://github.com/jgunthorpe/linux/commits/iommu_pt_all Containing supporting work and future steps: - ARM short descriptor (32 bit), ARM long descriptor (64 bit) formats - RISCV format and RISCV conversion https://github.com/jgunthorpe/linux/commits/iommu_pt_riscv - Support for a DMA incoherent HW page table walker - VT-D second stage format and VT-D conversion https://github.com/jgunthorpe/linux/commits/iommu_pt_vtd - DART v1 & v2 format - Draft of a iommufd 'cut' operation to break down huge pages - A compare test that checks the iommupt formats against the iopgtable interface, including updating AMD to have a working iopgtable and patches to make VT-D have an iopgtable for testing. - A performance test to micro-benchmark map and unmap against iogptable My strategy is to go one by one for the drivers: - AMD driver conversion - RISCV page table and driver - Intel VT-D driver and VTDSS page table - Flushing improvements for RISCV - ARM SMMUv3 And concurrently work on the algorithm side: - debugfs content dump, like VT-D has - Cut support - Increase/Decrease page size support - map/unmap batching - KHO As we make more algorithm improvements the value to convert the drivers increases. This is on github: https://github.com/jgunthorpe/linux/commits/iommu_pt v2: - Rebase on v6.16-rc2 - s/PT_ENTRY_WORD_SIZE/PT_ITEM_WORD_SIZE/s to follow the language better - Comment and documentation updates - Add PT_TOP_PHYS_MASK to help manage alignment restrictions on the top pointer - Add missed force_aperture = true - Make pt_iommu_deinit() take care of the not-yet-inited error case internally as AMD/RISCV/VTD all shared this logic - Change gather_range() into gather_range_pages() so it also deals with the page list. This makes the following cache flushing series simpler - Fix missed update of unmap->unmapped in some error cases - Change clear_contig() to order the gather more logically - Remove goto from the error handling in __map_range_leaf() - s/log2_/oalog2_/ in places where the argument is an oaddr_t - Pass the pts to pt_table_install64/32() - Do not use SIGN_EXTEND for the AMDv2 page table because of Vasant's information on how PASID 0 works. v1: https://patch.msgid.link/r/0-v2-5c26bde5c22d+58b-iommu_pt_jgg@nvidia.com - AMD driver only, many code changes RFC: https://lore.kernel.org/all/0-v1-01fa10580981+1d-iommu_pt_jgg@nvidia.com/ Alejandro Jimenez (1): iommu/amd: Use the generic iommu page table Jason Gunthorpe (14): genpt: Generic Page Table base API genpt: Add Documentation/ files iommupt: Add the basic structure of the iommu implementation iommupt: Add the AMD IOMMU v1 page table format iommupt: Add iova_to_phys op iommupt: Add unmap_pages op iommupt: Add map_pages op iommupt: Add read_and_clear_dirty op iommupt: Add a kunit test for Generic Page Table iommupt: Add a mock pagetable format for iommufd selftest to use iommufd: Change the selftest to use iommupt instead of xarray iommupt: Add the x86 64 bit page table format iommu/amd: Remove AMD io_pgtable support iommupt: Add a kunit test for the IOMMU implementation .clang-format | 1 + Documentation/driver-api/generic_pt.rst | 140 ++ Documentation/driver-api/index.rst | 1 + drivers/iommu/Kconfig | 2 + drivers/iommu/Makefile | 1 + drivers/iommu/amd/Kconfig | 5 +- drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/amd_iommu_types.h | 109 +- drivers/iommu/amd/io_pgtable.c | 560 -------- drivers/iommu/amd/io_pgtable_v2.c | 370 ------ drivers/iommu/amd/iommu.c | 516 ++++---- drivers/iommu/generic_pt/.kunitconfig | 13 + drivers/iommu/generic_pt/Kconfig | 72 ++ drivers/iommu/generic_pt/fmt/Makefile | 26 + drivers/iommu/generic_pt/fmt/amdv1.h | 409 ++++++ drivers/iommu/generic_pt/fmt/defs_amdv1.h | 21 + drivers/iommu/generic_pt/fmt/defs_x86_64.h | 21 + drivers/iommu/generic_pt/fmt/iommu_amdv1.c | 15 + drivers/iommu/generic_pt/fmt/iommu_mock.c | 10 + drivers/iommu/generic_pt/fmt/iommu_template.h | 48 + drivers/iommu/generic_pt/fmt/iommu_x86_64.c | 11 + drivers/iommu/generic_pt/fmt/x86_64.h | 248 ++++ drivers/iommu/generic_pt/iommu_pt.h | 1150 +++++++++++++++++ drivers/iommu/generic_pt/kunit_generic_pt.h | 717 ++++++++++ drivers/iommu/generic_pt/kunit_iommu.h | 183 +++ drivers/iommu/generic_pt/kunit_iommu_pt.h | 451 +++++++ drivers/iommu/generic_pt/pt_common.h | 354 +++++ drivers/iommu/generic_pt/pt_defs.h | 323 +++++ drivers/iommu/generic_pt/pt_fmt_defaults.h | 193 +++ drivers/iommu/generic_pt/pt_iter.h | 640 +++++++++ drivers/iommu/generic_pt/pt_log2.h | 130 ++ drivers/iommu/io-pgtable.c | 4 - drivers/iommu/iommufd/Kconfig | 1 + drivers/iommu/iommufd/iommufd_test.h | 11 +- drivers/iommu/iommufd/selftest.c | 439 +++---- include/linux/generic_pt/common.h | 166 +++ include/linux/generic_pt/iommu.h | 270 ++++ include/linux/io-pgtable.h | 2 - tools/testing/selftests/iommu/iommufd.c | 60 +- tools/testing/selftests/iommu/iommufd_utils.h | 12 + 41 files changed, 6119 insertions(+), 1589 deletions(-) create mode 100644 Documentation/driver-api/generic_pt.rst delete mode 100644 drivers/iommu/amd/io_pgtable.c delete mode 100644 drivers/iommu/amd/io_pgtable_v2.c create mode 100644 drivers/iommu/generic_pt/.kunitconfig create mode 100644 drivers/iommu/generic_pt/Kconfig create mode 100644 drivers/iommu/generic_pt/fmt/Makefile create mode 100644 drivers/iommu/generic_pt/fmt/amdv1.h create mode 100644 drivers/iommu/generic_pt/fmt/defs_amdv1.h create mode 100644 drivers/iommu/generic_pt/fmt/defs_x86_64.h create mode 100644 drivers/iommu/generic_pt/fmt/iommu_amdv1.c create mode 100644 drivers/iommu/generic_pt/fmt/iommu_mock.c create mode 100644 drivers/iommu/generic_pt/fmt/iommu_template.h create mode 100644 drivers/iommu/generic_pt/fmt/iommu_x86_64.c create mode 100644 drivers/iommu/generic_pt/fmt/x86_64.h create mode 100644 drivers/iommu/generic_pt/iommu_pt.h create mode 100644 drivers/iommu/generic_pt/kunit_generic_pt.h create mode 100644 drivers/iommu/generic_pt/kunit_iommu.h create mode 100644 drivers/iommu/generic_pt/kunit_iommu_pt.h create mode 100644 drivers/iommu/generic_pt/pt_common.h create mode 100644 drivers/iommu/generic_pt/pt_defs.h create mode 100644 drivers/iommu/generic_pt/pt_fmt_defaults.h create mode 100644 drivers/iommu/generic_pt/pt_iter.h create mode 100644 drivers/iommu/generic_pt/pt_log2.h create mode 100644 include/linux/generic_pt/common.h create mode 100644 include/linux/generic_pt/iommu.h base-commit: cd76b0248a38645a3e3f8ca4a48bffc591e9da19 -- 2.43.0

1 month, 3 weeks

4
24
0 0

[RFC PATCH v2 0/9] KVM: Enable Nested Virt selftests

by Ganapatrao Kulkarni

This patch series makes the selftest work with NV enabled. The guest code is run in vEL2 instead of EL1. We add a command line option to enable testing of NV. The NV tests are disabled by default. Modified around 12 selftests in this series. Changes since v1: - Updated NV helper functions as per comments [1]. - Modified existing testscases to run guest code in vEL2. [1] https://lkml.iu.edu/hypermail/linux/kernel/2502.0/07001.html Ganapatrao Kulkarni (9): KVM: arm64: nv: selftests: Add support to run guest code in vEL2. KVM: arm64: nv: selftests: Add simple test to run guest code in vEL2 KVM: arm64: nv: selftests: Enable hypervisor timer tests to run in vEL2 KVM: arm64: nv: selftests: enable aarch32_id_regs test to run in vEL2 KVM: arm64: nv: selftests: Enable vgic tests to run in vEL2 KVM: arm64: nv: selftests: Enable set_id_regs test to run in vEL2 KVM: arm64: nv: selftests: Enable test to run in vEL2 KVM: selftests: arm64: Extend kvm_page_table_test to run guest code in vEL2 KVM: arm64: nv: selftests: Enable page_fault_test test to run in vEL2 tools/testing/selftests/kvm/Makefile.kvm | 2 + tools/testing/selftests/kvm/arch_timer.c | 8 +- .../selftests/kvm/arm64/aarch32_id_regs.c | 34 ++++- .../testing/selftests/kvm/arm64/arch_timer.c | 118 +++++++++++++++--- .../selftests/kvm/arm64/nv_guest_hypervisor.c | 68 ++++++++++ .../selftests/kvm/arm64/page_fault_test.c | 35 +++++- .../testing/selftests/kvm/arm64/set_id_regs.c | 57 ++++++++- tools/testing/selftests/kvm/arm64/vgic_init.c | 54 +++++++- tools/testing/selftests/kvm/arm64/vgic_irq.c | 27 ++-- .../selftests/kvm/arm64/vgic_lpi_stress.c | 19 ++- .../testing/selftests/kvm/guest_print_test.c | 32 +++++ .../selftests/kvm/include/arm64/arch_timer.h | 16 +++ .../kvm/include/arm64/kvm_util_arch.h | 3 + .../selftests/kvm/include/arm64/nv_util.h | 45 +++++++ .../selftests/kvm/include/arm64/vgic.h | 1 + .../testing/selftests/kvm/include/kvm_util.h | 3 + .../selftests/kvm/include/timer_test.h | 1 + .../selftests/kvm/kvm_page_table_test.c | 30 ++++- tools/testing/selftests/kvm/lib/arm64/nv.c | 46 +++++++ .../selftests/kvm/lib/arm64/processor.c | 61 ++++++--- tools/testing/selftests/kvm/lib/arm64/vgic.c | 8 ++ 21 files changed, 604 insertions(+), 64 deletions(-) create mode 100644 tools/testing/selftests/kvm/arm64/nv_guest_hypervisor.c create mode 100644 tools/testing/selftests/kvm/include/arm64/nv_util.h create mode 100644 tools/testing/selftests/kvm/lib/arm64/nv.c -- 2.48.1

1 month, 4 weeks

6
28
0 0

[PATCH v2 0/5] KVM: Improve VMware guest support

by Zack Rusin

This is the second version of a series that lets us run VMware Workstation on Linux on top of KVM. The most significant change in this series is the introduction of CONFIG_KVM_VMWARE which is, in general, a nice cleanup for various bits of VMware compatibility code that have been scattered around KVM. (first patch) The rest of the series builds upon the VMware platform to implement features that are needed to run VMware guests without any modifications on top of KVM: - ability to turn on the VMware backdoor at runtime on a per-vm basis (used to be a kernel boot argument only) - support for VMware hypercalls - VMware products have a huge collection of hypercalls, all of which are handled in userspace, - support for handling legacy VMware backdoor in L0 in nested configs - in cases where we have WS running a Windows VBS guest, the L0 would be KVM, L1 Hyper-V so by default VMware Tools backdoor calls endup in Hyper-V which can not handle them, so introduce a cap to let L0 handle those. The final change in the series is a kselftest of the VMware hypercall functionality. Cc: Paolo Bonzini <pbonzini(a)redhat.com> Cc: Jonathan Corbet <corbet(a)lwn.net> Cc: Sean Christopherson <seanjc(a)google.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: Ingo Molnar <mingo(a)redhat.com> Cc: Borislav Petkov <bp(a)alien8.de> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: x86(a)kernel.org Cc: "H. Peter Anvin" <hpa(a)zytor.com> Cc: Zack Rusin <zack.rusin(a)broadcom.com> Cc: Doug Covelli <doug.covelli(a)broadcom.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Namhyung Kim <namhyung(a)kernel.org> Cc: Arnaldo Carvalho de Melo <acme(a)redhat.com> Cc: Michael Ellerman <mpe(a)ellerman.id.au> Cc: Joel Stanley <joel(a)jms.id.au> Cc: Isaku Yamahata <isaku.yamahata(a)intel.com> Cc: kvm(a)vger.kernel.org Cc: linux-doc(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Zack Rusin (5): KVM: x86: Centralize KVM's VMware code KVM: x86: Allow enabling of the vmware backdoor via a cap KVM: x86: Add support for VMware guest specific hypercalls KVM: x86: Add support for legacy VMware backdoors in nested setups KVM: selftests: x86: Add a test for KVM_CAP_X86_VMWARE_HYPERCALL Documentation/virt/kvm/api.rst | 86 +++++++- MAINTAINERS | 9 + arch/x86/include/asm/kvm_host.h | 13 ++ arch/x86/kvm/Kconfig | 16 ++ arch/x86/kvm/Makefile | 1 + arch/x86/kvm/emulate.c | 11 +- arch/x86/kvm/kvm_vmware.c | 85 ++++++++ arch/x86/kvm/kvm_vmware.h | 189 ++++++++++++++++++ arch/x86/kvm/pmu.c | 39 +--- arch/x86/kvm/pmu.h | 4 - arch/x86/kvm/svm/nested.c | 6 + arch/x86/kvm/svm/svm.c | 10 +- arch/x86/kvm/vmx/nested.c | 6 + arch/x86/kvm/vmx/vmx.c | 5 +- arch/x86/kvm/x86.c | 74 +++---- arch/x86/kvm/x86.h | 2 - include/uapi/linux/kvm.h | 27 +++ tools/include/uapi/linux/kvm.h | 3 + tools/testing/selftests/kvm/Makefile.kvm | 1 + .../selftests/kvm/x86/vmware_hypercall_test.c | 121 +++++++++++ 20 files changed, 614 insertions(+), 94 deletions(-) create mode 100644 arch/x86/kvm/kvm_vmware.c create mode 100644 arch/x86/kvm/kvm_vmware.h create mode 100644 tools/testing/selftests/kvm/x86/vmware_hypercall_test.c -- 2.48.1

2 months

2
2
0 0

[PATCH/RFC] kunit/rtc: Add real support for very slow tests

by Geert Uytterhoeven

When running rtc_lib_test ("lib_test" before my "[PATCH] rtc: Rename lib_test to rtc_lib_test") on m68k/ARAnyM: KTAP version 1 1..1 KTAP version 1 # Subtest: rtc_lib_test_cases # module: rtc_lib_test 1..2 # rtc_time64_to_tm_test_date_range_1000: Test should be marked slow (runtime: 3.222371420s) ok 1 rtc_time64_to_tm_test_date_range_1000 # rtc_time64_to_tm_test_date_range_160000: try timed out # rtc_time64_to_tm_test_date_range_160000: test case timed out # rtc_time64_to_tm_test_date_range_160000.speed: slow not ok 2 rtc_time64_to_tm_test_date_range_160000 # rtc_lib_test_cases: pass:1 fail:1 skip:0 total:2 # Totals: pass:1 fail:1 skip:0 total:2 not ok 1 rtc_lib_test_cases Commit 02c2d0c2a84172c3 ("kunit: Add speed attribute") added the notion of "very slow" tests, but this is further unused and unhandled. Hence: 1. Introduce KUNIT_CASE_VERY_SLOW(), 2. Increase timeout by ten; ideally this should only be done for very slow tests, but I couldn't find how to access kunit_case.attr.case from kunit_try_catch_run(), 3. Mark rtc_time64_to_tm_test_date_range_1000 slow, 4. Mark rtc_time64_to_tm_test_date_range_160000 very slow. Afterwards: KTAP version 1 1..1 KTAP version 1 # Subtest: rtc_lib_test_cases # module: rtc_lib_test 1..2 # rtc_time64_to_tm_test_date_range_1000.speed: slow ok 1 rtc_time64_to_tm_test_date_range_1000 # rtc_time64_to_tm_test_date_range_160000.speed: very_slow ok 2 rtc_time64_to_tm_test_date_range_160000 # rtc_lib_test_cases: pass:2 fail:0 skip:0 total:2 # Totals: pass:2 fail:0 skip:0 total:2 ok 1 rtc_lib_test_cases Signed-off-by: Geert Uytterhoeven <geert(a)linux-m68k.org> --- drivers/rtc/rtc_lib_test.c | 4 ++-- include/kunit/test.h | 11 +++++++++++ lib/kunit/try-catch.c | 3 ++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc_lib_test.c b/drivers/rtc/rtc_lib_test.c index c30c759662e39b48..fd3210e39d37dbc6 100644 --- a/drivers/rtc/rtc_lib_test.c +++ b/drivers/rtc/rtc_lib_test.c @@ -85,8 +85,8 @@ static void rtc_time64_to_tm_test_date_range_1000(struct kunit *test) } static struct kunit_case rtc_lib_test_cases[] = { - KUNIT_CASE(rtc_time64_to_tm_test_date_range_1000), - KUNIT_CASE_SLOW(rtc_time64_to_tm_test_date_range_160000), + KUNIT_CASE_SLOW(rtc_time64_to_tm_test_date_range_1000), + KUNIT_CASE_VERY_SLOW(rtc_time64_to_tm_test_date_range_160000), {} }; diff --git a/include/kunit/test.h b/include/kunit/test.h index 9b773406e01f3c43..4e3c1cae5b41466e 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -183,6 +183,17 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status) { .run_case = test_name, .name = #test_name, \ .attr.speed = KUNIT_SPEED_SLOW, .module_name = KBUILD_MODNAME} +/** + * KUNIT_CASE_VERY_SLOW - A helper for creating a &struct kunit_case + * with the very slow attribute + * + * @test_name: a reference to a test case function. + */ + +#define KUNIT_CASE_VERY_SLOW(test_name) \ + { .run_case = test_name, .name = #test_name, \ + .attr.speed = KUNIT_SPEED_VERY_SLOW, .module_name = KBUILD_MODNAME} + /** * KUNIT_CASE_PARAM - A helper for creation a parameterized &struct kunit_case * diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c index 6bbe0025b0790bd2..92099c67bb21d0a4 100644 --- a/lib/kunit/try-catch.c +++ b/lib/kunit/try-catch.c @@ -56,7 +56,8 @@ static unsigned long kunit_test_timeout(void) * If tests timeout due to exceeding sysctl_hung_task_timeout_secs, * the task will be killed and an oops generated. */ - return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */ + // FIXME times ten for KUNIT_SPEED_VERY_SLOW? + return 10 * 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */ } void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context) -- 2.43.0

2 months

3
3
0 0

[RESEND PATCH] selftests/pidfd: align stack to fix SP alignment exception

by Shuai Xue

The pidfd_test fails on the ARM64 platform with the following error: Bail out! pidfd_poll check for premature notification on child thread exec test: Failed When exception-trace is enabled, the kernel logs the details: #echo 1 > /proc/sys/debug/exception-trace #dmesg | tail -n 20 [48628.713023] pidfd_test[1082142]: unhandled exception: SP Alignment, ESR 0x000000009a000000, SP/PC alignment exception in pidfd_test[400000+4000] [48628.713049] CPU: 21 PID: 1082142 Comm: pidfd_test Kdump: loaded Tainted: G W E 6.6.71-3_rc1.al8.aarch64 #1 [48628.713051] Hardware name: AlibabaCloud AliServer-Xuanwu2.0AM-1UC1P-5B/AS1111MG1, BIOS 1.2.M1.AL.P.157.00 07/29/2023 [48628.713053] pstate: 60001800 (nZCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=-c) [48628.713055] pc : 0000000000402100 [48628.713056] lr : 0000ffff98288f9c [48628.713056] sp : 0000ffffde49daa8 [48628.713057] x29: 0000000000000000 x28: 0000000000000000 x27: 0000000000000000 [48628.713060] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 [48628.713062] x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000400e80 [48628.713065] x20: 0000000000000000 x19: 0000000000402650 x18: 0000000000000000 [48628.713067] x17: 00000000004200d8 x16: 0000ffff98288f40 x15: 0000ffffde49b92c [48628.713070] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [48628.713072] x11: 0000000000001011 x10: 0000000000402100 x9 : 0000000000000010 [48628.713074] x8 : 00000000000000dc x7 : 3861616239346564 x6 : 000000000000000a [48628.713077] x5 : 0000ffffde49daa8 x4 : 000000000000000a x3 : 0000ffffde49daa8 [48628.713079] x2 : 0000ffffde49dadc x1 : 0000ffffde49daa8 x0 : 0000000000000000 According to ARM ARM D1.3.10.2 SP alignment checking: > When the SP is used as the base address of a calculation, regardless of > any offset applied by the instruction, if bits [3:0] of the SP are not > 0b0000, there is a misaligned SP. To fix it, align the stack with 16 bytes. Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com> --- tools/testing/selftests/pidfd/pidfd_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c081ae91313a..ec161a7c3ff9 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -33,7 +33,7 @@ static bool have_pidfd_send_signal; static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *)) { size_t stack_size = 1024; - char *stack[1024] = { 0 }; + char *stack[1024] __attribute__((aligned(16))) = {0}; #ifdef __ia64__ return __clone2(fn, stack, stack_size, flags | SIGCHLD, NULL, pidfd); -- 2.39.3

2 months

3
6
0 0

[PATCH v2 0/2] rust: minor idiomatic fixes to doctest generator

by Tamir Duberstein

Please see individual commit messages. Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v2: - rustfmt. - Alice's RB. - Add second patch to emit information in panic rather than separately to stderr. - Link to v1: https://lore.kernel.org/r/20250527-idiomatic-match-slice-v1-1-34b0b1d1d58c@… --- Tamir Duberstein (2): rust: replace length checks with match rust: emit path candidates in panic message scripts/rustdoc_test_gen.rs | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) --- base-commit: 1ce98bb2bb30713ec4374ef11ead0d7d3e856766 change-id: 20250527-idiomatic-match-slice-26a79d100e4d Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

2 months

2
8
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror June 2025