July 2025 - Linux-kselftest-mirror

[PATCH] kho: add test for kexec handover

by Mike Rapoport

From: "Mike Rapoport (Microsoft)" <rppt(a)kernel.org> Testing kexec handover requires a kernel driver that will generate some data and preserve it with KHO on the first boot and then restore that data and verify it was preserved properly after kexec. To facilitate such test, along with the kernel driver responsible for data generation, preservation and restoration add a script that runs a kernel in a VM with a minimal /init. The /init enables KHO, loads a kernel image for kexec and runs kexec reboot. After the boot of the kexeced kernel, the driver verifies that the data was properly preserved. Signed-off-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org> --- MAINTAINERS | 1 + lib/Kconfig.debug | 21 ++ lib/Makefile | 1 + lib/test_kho.c | 305 +++++++++++++++++++++++++ tools/testing/selftests/kho/arm64.conf | 9 + tools/testing/selftests/kho/init.c | 100 ++++++++ tools/testing/selftests/kho/vmtest.sh | 183 +++++++++++++++ tools/testing/selftests/kho/x86.conf | 7 + 8 files changed, 627 insertions(+) create mode 100644 lib/test_kho.c create mode 100644 tools/testing/selftests/kho/arm64.conf create mode 100644 tools/testing/selftests/kho/init.c create mode 100755 tools/testing/selftests/kho/vmtest.sh create mode 100644 tools/testing/selftests/kho/x86.conf diff --git a/MAINTAINERS b/MAINTAINERS index 10850512c118..7eada657c5e6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13356,6 +13356,7 @@ F: Documentation/admin-guide/mm/kho.rst F: Documentation/core-api/kho/* F: include/linux/kexec_handover.h F: kernel/kexec_handover.c +F: tools/testing/selftests/kho/ KEYS-ENCRYPTED M: Mimi Zohar <zohar(a)linux.ibm.com> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ebe33181b6e6..4f82d38e3c45 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3225,6 +3225,27 @@ config TEST_OBJPOOL If unsure, say N. +config TEST_KEXEC_HANDOVER + bool "Test for Kexec HandOver" + default n + depends on KEXEC_HANDOVER + help + This option enables test for Kexec HandOver (KHO). + The test consists of two parts: saving kernel data before kexec and + restoring the data after kexec and verifying that it was properly + handed over. This test module creates and saves data on the boot of + the first kernel and restores and verifies the data on the boot of + kexec'ed kernel. + + For detailed documentation about KHO, see Documentation/core-api/kho. + + To run the test run: + + tools/testing/selftests/kho/vmtest.sh -h + + If unsure, say N. + + config INT_POW_KUNIT_TEST tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index c38582f187dd..6a8d00aac3a8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o +obj-$(CONFIG_TEST_KEXEC_HANDOVER) += test_kho.o obj-$(CONFIG_TEST_FPU) += test_fpu.o test_fpu-y := test_fpu_glue.o test_fpu_impl.o diff --git a/lib/test_kho.c b/lib/test_kho.c new file mode 100644 index 000000000000..f5fe39c7c2b1 --- /dev/null +++ b/lib/test_kho.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test module for KHO + * Copyright (c) 2025 Microsoft Corporation. + * + * Authors: + * Saurabh Sengar <ssengar(a)microsoft.com> + * Mike Rapoport <rppt(a)kernel.org> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/libfdt.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/vmalloc.h> +#include <linux/kexec_handover.h> + +#include <net/checksum.h> + +#define KHO_TEST_MAGIC 0x4b484f21 /* KHO! */ +#define KHO_TEST_FDT "kho_test" +#define KHO_TEST_COMPAT "kho-test-v1" + +static long max_mem = (PAGE_SIZE << MAX_PAGE_ORDER) * 2; +module_param(max_mem, long, 0644); + +struct kho_test_state { + unsigned int nr_folios; + struct folio **folios; + struct folio *fdt; + __wsum csum; +}; + +static struct kho_test_state kho_test_state; + +static int kho_test_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + struct kho_test_state *state = &kho_test_state; + struct kho_serialization *ser = v; + int err = 0; + + switch (cmd) { + case KEXEC_KHO_ABORT: + return NOTIFY_DONE; + case KEXEC_KHO_FINALIZE: + /* Handled below */ + break; + default: + return NOTIFY_BAD; + } + + err |= kho_preserve_folio(state->fdt); + err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt)); + + return err ? NOTIFY_BAD : NOTIFY_DONE; +} + +static struct notifier_block kho_test_nb = { + .notifier_call = kho_test_notifier, +}; + +static int kho_test_save_data(struct kho_test_state *state, void *fdt) +{ + phys_addr_t *folios_info __free(kvfree) = NULL; + int err = 0; + + folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info), + GFP_KERNEL); + if (!folios_info) + return -ENOMEM; + + for (int i = 0; i < state->nr_folios; i++) { + struct folio *folio = state->folios[i]; + unsigned int order = folio_order(folio); + + folios_info[i] = virt_to_phys(folio_address(folio)) | order; + + err = kho_preserve_folio(folio); + if (err) + return err; + } + + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property(fdt, "folios_info", folios_info, + state->nr_folios * sizeof(*folios_info)); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + return err; +} + +static int kho_test_prepare_fdt(struct kho_test_state *state) +{ + const char compatible[] = KHO_TEST_COMPAT; + unsigned int magic = KHO_TEST_MAGIC; + ssize_t fdt_size; + int err = 0; + void *fdt; + + fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE; + state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size)); + if (!state->fdt) + return -ENOMEM; + + fdt = folio_address(state->fdt); + + err |= fdt_create(fdt, fdt_size); + err |= fdt_finish_reservemap(fdt); + + err |= fdt_begin_node(fdt, ""); + err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible)); + err |= fdt_property(fdt, "magic", &magic, sizeof(magic)); + err |= kho_test_save_data(state, fdt); + err |= fdt_end_node(fdt); + + err |= fdt_finish(fdt); + + if (err) + folio_put(state->fdt); + + return err; +} + +static int kho_test_generate_data(struct kho_test_state *state) +{ + size_t alloc_size = 0; + __wsum csum = 0; + + while (alloc_size < max_mem) { + int order = get_random_u32() % NR_PAGE_ORDERS; + struct folio *folio; + unsigned int size; + void *addr; + + /* cap allocation so that we won't exceed max_mem */ + if (alloc_size + (PAGE_SIZE << order) > max_mem) { + order = get_order(max_mem - alloc_size); + if (order) + order--; + } + size = PAGE_SIZE << order; + + folio = folio_alloc(GFP_KERNEL | __GFP_NORETRY, order); + if (!folio) + goto err_free_folios; + + state->folios[state->nr_folios++] = folio; + addr = folio_address(folio); + get_random_bytes(addr, size); + csum = csum_partial(addr, size, csum); + alloc_size += size; + } + + state->csum = csum; + return 0; + +err_free_folios: + for (int i = 0; i < state->nr_folios; i++) + folio_put(state->folios[i]); + return -ENOMEM; +} + +static int kho_test_save(void) +{ + struct kho_test_state *state = &kho_test_state; + struct folio **folios __free(kvfree) = NULL; + unsigned long max_nr; + int err; + + max_mem = PAGE_ALIGN(max_mem); + max_nr = max_mem >> PAGE_SHIFT; + + folios = kvmalloc_array(max_nr, sizeof(*state->folios), GFP_KERNEL); + if (!folios) + return -ENOMEM; + state->folios = folios; + + err = kho_test_generate_data(state); + if (err) + return err; + + err = kho_test_prepare_fdt(state); + if (err) + return err; + + return register_kho_notifier(&kho_test_nb); +} + +static int __init kho_test_restore_data(const void *fdt, int node) +{ + const unsigned int *nr_folios; + const phys_addr_t *folios_info; + const __wsum *old_csum; + __wsum csum = 0; + int len; + + node = fdt_path_offset(fdt, "/data"); + + nr_folios = fdt_getprop(fdt, node, "nr_folios", &len); + if (!nr_folios || len != sizeof(*nr_folios)) + return -EINVAL; + + old_csum = fdt_getprop(fdt, node, "csum", &len); + if (!old_csum || len != sizeof(*old_csum)) + return -EINVAL; + + folios_info = fdt_getprop(fdt, node, "folios_info", &len); + if (!folios_info || len != sizeof(*folios_info) * *nr_folios) + return -EINVAL; + + for (int i = 0; i < *nr_folios; i++) { + unsigned int order = folios_info[i] & ~PAGE_MASK; + phys_addr_t phys = folios_info[i] & PAGE_MASK; + unsigned int size = PAGE_SIZE << order; + struct folio *folio; + + folio = kho_restore_folio(phys); + if (!folio) + break; + + if (folio_order(folio) != order) + break; + + csum = csum_partial(folio_address(folio), size, csum); + folio_put(folio); + } + + if (csum != *old_csum) + return -EINVAL; + + return 0; +} + +static int kho_test_restore(phys_addr_t fdt_phys) +{ + void *fdt = phys_to_virt(fdt_phys); + const unsigned int *magic; + int node, len, err; + + node = fdt_path_offset(fdt, "/"); + if (node < 0) + return -EINVAL; + + if (fdt_node_check_compatible(fdt, node, KHO_TEST_COMPAT)) + return -EINVAL; + + magic = fdt_getprop(fdt, node, "magic", &len); + if (!magic || len != sizeof(*magic)) + return -EINVAL; + + if (*magic != KHO_TEST_MAGIC) + return -EINVAL; + + err = kho_test_restore_data(fdt, node); + if (err) + return err; + + pr_info("KHO restore succeeded\n"); + return 0; +} + +static int __init kho_test_init(void) +{ + phys_addr_t fdt_phys; + int err; + + err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); + if (!err) + return kho_test_restore(fdt_phys); + + if (err != -ENOENT) { + pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err); + return err; + } + + return kho_test_save(); +} +module_init(kho_test_init); + +static void kho_test_cleanup(void) +{ + for (int i = 0; i < kho_test_state.nr_folios; i++) + folio_put(kho_test_state.folios[i]); + + kvfree(kho_test_state.folios); +} + +static void __exit kho_test_exit(void) +{ + unregister_kho_notifier(&kho_test_nb); + kho_test_cleanup(); +} +module_exit(kho_test_exit); + +MODULE_AUTHOR("Mike Rapoport <rppt(a)kernel.org>"); +MODULE_DESCRIPTION("KHO test module"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/kho/arm64.conf b/tools/testing/selftests/kho/arm64.conf new file mode 100644 index 000000000000..ee696807cd35 --- /dev/null +++ b/tools/testing/selftests/kho/arm64.conf @@ -0,0 +1,9 @@ +QEMU_CMD="qemu-system-aarch64 -M virt -cpu max" +QEMU_KCONFIG=" +CONFIG_SERIAL_AMBA_PL010=y +CONFIG_SERIAL_AMBA_PL010_CONSOLE=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +" +KERNEL_IMAGE="Image" +KERNEL_CMDLINE="console=ttyAMA0" diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c new file mode 100644 index 000000000000..8034e24c6bf6 --- /dev/null +++ b/tools/testing/selftests/kho/init.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef NOLIBC +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <syscall.h> +#include <sys/mount.h> +#include <sys/reboot.h> +#endif + +/* from arch/x86/include/asm/setup.h */ +#define COMMAND_LINE_SIZE 2048 + +/* from include/linux/kexex.h */ +#define KEXEC_FILE_NO_INITRAMFS 0x00000004 + +#define KHO_FINILIZE "/debugfs/kho/out/finalize" +#define KERNEL_IMAGE "/kernel" + +static int mount_filesystems(void) +{ + if (mount("debugfs", "/debugfs", "debugfs", 0, NULL) < 0) + return -1; + + return mount("proc", "/proc", "proc", 0, NULL); +} + +static int kho_enable(void) +{ + const char enable[] = "1"; + int fd; + + fd = open(KHO_FINILIZE, O_RDWR); + if (fd < 0) + return -1; + + if (write(fd, enable, sizeof(enable)) != sizeof(enable)) + return 1; + + close(fd); + return 0; +} + +static long kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, const char *cmdline, + unsigned long flags) +{ + return syscall(__NR_kexec_file_load, kernel_fd, initrd_fd, cmdline_len, + cmdline, flags); +} + +static int kexec_load(void) +{ + char cmdline[COMMAND_LINE_SIZE]; + ssize_t len; + int fd, err; + + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, cmdline, sizeof(cmdline)); + close(fd); + if (len < 0) + return -1; + + /* replace \n with \0 */ + cmdline[len - 1] = 0; + fd = open(KERNEL_IMAGE, O_RDONLY); + if (fd < 0) + return -1; + + err = kexec_file_load(fd, -1, len, cmdline, KEXEC_FILE_NO_INITRAMFS); + close(fd); + + return err ? : 0; +} + +int main(int argc, char *argv[]) +{ + if (mount_filesystems()) + goto err_reboot; + + if (kho_enable()) + goto err_reboot; + + if (kexec_load()) + goto err_reboot; + + if (reboot(RB_KEXEC)) + goto err_reboot; + + return 0; + +err_reboot: + reboot(RB_AUTOBOOT); + return -1; +} diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh new file mode 100755 index 000000000000..ec70a17bd476 --- /dev/null +++ b/tools/testing/selftests/kho/vmtest.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -ue + +CROSS_COMPILE="${CROSS_COMPILE:-""}" + +test_dir=$(realpath "$(dirname "$0")") +kernel_dir=$(realpath "$test_dir/../../../..") + +tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX) +headers_dir="$tmp_dir/usr" +initrd_dir="$tmp_dir/initrd" +initrd="$tmp_dir/initrd.cpio" + +source "$test_dir/../kselftest/ktap_helpers.sh" + +function usage() { + cat <<EOF +$0 [-d build_dir] [-j jobs] [-t target_arch] [-h] +Options: + -d) path to the kernel build directory + -j) number of jobs for compilation, similar to -j in make + -t) run test for target_arch, requires CROSS_COMPILE set + supported targets: aarch64, x86_64 + -h) display this help +EOF +} + +function cleanup() { + rm -fr "$tmp_dir" + ktap_finished +} +trap cleanup EXIT + +function skip() { + local msg=${1:-""} + + ktap_test_skip "$msg" + exit "$KSFT_SKIP" +} + +function fail() { + local msg=${1:-""} + + ktap_test_fail "$msg" + exit "$KSFT_FAIL" +} + +function build_kernel() { + local build_dir=$1 + local make_cmd=$2 + local arch_kconfig=$3 + local kimage=$4 + + local kho_config="$tmp_dir/kho.config" + local kconfig="$build_dir/.config" + + # enable initrd, KHO and KHO test in kernel configuration + tee "$kconfig" > "$kho_config" <<EOF +CONFIG_BLK_DEV_INITRD=y +CONFIG_KEXEC_HANDOVER=y +CONFIG_TEST_KEXEC_HANDOVER=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_VM=y +$arch_kconfig +EOF + + make_cmd="$make_cmd -C $kernel_dir O=$build_dir" + $make_cmd olddefconfig + + # verify that kernel confiration has all necessary options + while read -r opt ; do + grep "$opt" "$kconfig" &>/dev/null || skip "$opt is missing" + done < "$kho_config" + + $make_cmd "$kimage" + $make_cmd headers_install INSTALL_HDR_PATH="$headers_dir" +} + +function mkinitrd() { + local kernel=$1 + + mkdir -p "$initrd_dir"/{dev,debugfs,proc} + sudo mknod "$initrd_dir/dev/console" c 5 1 + + "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -I"$headers_dir/include" \ + -fno-asynchronous-unwind-tables -fno-ident -nostdlib \ + -include "$test_dir/../../../include/nolibc/nolibc.h" \ + -o "$initrd_dir/init" "$test_dir/init.c" \ + + cp "$kernel" "$initrd_dir/kernel" + + pushd "$initrd_dir" &>/dev/null + find . | cpio -H newc --create > "$initrd" 2>/dev/null + popd &>/dev/null +} + +function run_qemu() { + local qemu_cmd=$1 + local cmdline=$2 + local kernel=$3 + local serial="$tmp_dir/qemu.serial" + + cmdline="$cmdline kho=on panic=-1" + + $qemu_cmd -m 1G -smp 2 -no-reboot -nographic -nodefaults \ + -accel kvm -accel hvf -accel tcg \ + -serial file:"$serial" \ + -append "$cmdline" \ + -kernel "$kernel" \ + -initrd "$initrd" + + grep "KHO restore succeeded" "$serial" &> /dev/null || fail "KHO failed" +} + +function target_to_arch() { + local target=$1 + + case $target in + aarch64) echo "arm64" ;; + x86_64) echo "x86" ;; + *) skip "architecture $target is not supported" + esac +} + +function main() { + local build_dir="$kernel_dir/.kho" + local jobs=$(($(nproc) * 2)) + local target="$(uname -m)" + + # skip the test if any of the preparation steps fails + set -o errtrace + trap skip ERR + + while getopts 'hd:j:t:' opt; do + case $opt in + d) + build_dir="$OPTARG" + ;; + j) + jobs="$OPTARG" + ;; + t) + target="$OPTARG" + ;; + h) + usage + exit 0 + ;; + *) + echo Unknown argument "$opt" + usage + exit 1 + ;; + esac + done + + ktap_print_header + ktap_set_plan 1 + + if [[ "$target" != "$(uname -m)" ]] && [[ -z "$CROSS_COMPILE" ]]; then + skip "Cross-platform testing needs to specify CROSS_COMPILE" + fi + + mkdir -p "$build_dir" + local arch=$(target_to_arch "$target") + source "$test_dir/$arch.conf" + + # build the kernel and create initrd + # initrd includes the kernel image that will be kexec'ed + local make_cmd="make ARCH=$arch CROSS_COMPILE=$CROSS_COMPILE -j$jobs" + build_kernel "$build_dir" "$make_cmd" "$QEMU_KCONFIG" "$KERNEL_IMAGE" + + local kernel="$build_dir/arch/$arch/boot/$KERNEL_IMAGE" + mkinitrd "$kernel" + + run_qemu "$QEMU_CMD" "$KERNEL_CMDLINE" "$kernel" + + ktap_test_pass "KHO succeeded" +} + +main "$@" diff --git a/tools/testing/selftests/kho/x86.conf b/tools/testing/selftests/kho/x86.conf new file mode 100644 index 000000000000..b419e610ca22 --- /dev/null +++ b/tools/testing/selftests/kho/x86.conf @@ -0,0 +1,7 @@ +QEMU_CMD=qemu-system-x86_64 +QEMU_KCONFIG=" +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +" +KERNEL_IMAGE="bzImage" +KERNEL_CMDLINE="console=ttyS0" base-commit: 89be9a83ccf1f88522317ce02f854f30d6115c41 -- 2.47.2

3 weeks, 3 days

2
1
0 0

[PATCH v1 0/4] A couple of improvements for VMM to inject external abort to guest

by Jiaqi Yan

There are several situations where VMM is involved when handling synchronous external instruction or data aborts, and often VMM needs to inject external aborts to guest. In addition to manipulating individual registers with KVM_SET_ONE_REG API, an easier way is to use the KVM_SET_VCPU_EVENTS API. This patchset adds two new features to the KVM_SET_VCPU_EVENTS API. 1. Extend KVM_SET_VCPU_EVENTS to support external instruction abort. 2. Allow userspace to emulate ESR_ELx.ISS by supplying ESR_ELx. In this way, we can also allow userspace to emulate ESR_ELx.ISS2 in future. The UAPI change for #1 is straightforward. However, I would appreciate some feedback on the ABI change for #2: struct kvm_vcpu_events { struct { __u8 serror_pending; __u8 serror_has_esr; __u8 ext_dabt_pending; __u8 ext_iabt_pending; __u8 ext_abt_has_esr; __u8 pad[3]; __u64 serror_esr; __u64 ext_abt_esr; // <= +8 bytes } exception; __u32 reserved[10]; // <= -8 bytes }; The offset to kvm_vcpu_events.reserved changes, and the size of exception changes. I think we can't say userspace will never access reserved, or they will never use sizeof(exception). Theoretically this is an ABI break and I want to call it out and ask if a new ABI is needed for feature #2. For example, is it worthy to introduce exception_v2 or kvm_vcpu_events_v2. Based on commit 7b8346bd9fce6 ("KVM: arm64: Don't attempt vLPI mappings when vPE allocation is disabled") Jiaqi Yan (3): KVM: arm64: Allow userspace to supply ESR when injecting SEA KVM: selftests: Test injecting external abort with ISS Documentation: kvm: update UAPI for injecting SEA Raghavendra Rao Ananta (1): KVM: arm64: Allow userspace to inject external instruction abort Documentation/virt/kvm/api.rst | 48 +++-- arch/arm64/include/asm/kvm_emulate.h | 9 +- arch/arm64/include/uapi/asm/kvm.h | 7 +- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/emulate-nested.c | 6 +- arch/arm64/kvm/guest.c | 42 ++-- arch/arm64/kvm/inject_fault.c | 16 +- include/uapi/linux/kvm.h | 1 + tools/arch/arm64/include/uapi/asm/kvm.h | 7 +- .../selftests/kvm/arm64/external_aborts.c | 191 +++++++++++++++--- .../testing/selftests/kvm/arm64/inject_iabt.c | 98 +++++++++ 11 files changed, 352 insertions(+), 74 deletions(-) create mode 100644 tools/testing/selftests/kvm/arm64/inject_iabt.c -- 2.50.1.565.gc32cd1483b-goog

3 weeks, 3 days

1
4
0 0

[PATCH v2 0/6] VMM can handle guest SEA via KVM_EXIT_ARM_SEA

by Jiaqi Yan

Problem ======= When host APEI is unable to claim synchronous external abort (SEA) during stage-2 guest abort, today KVM directly injects an async SError into the VCPU then resumes it. The injected SError usually results in unpleasant guest kernel panic. One of the major situation of guest SEA is when VCPU consumes recoverable uncorrected memory error (UER), which is not uncommon at all in modern datacenter servers with large amounts of physical memory. Although SError and guest panic is sufficient to stop the propagation of corrupted memory there is room to recover from an UER in a more graceful manner. Proposed Solution ================= Alternatively KVM can replay the SEA to the faulting VCPU, via existing KVM_SET_VCPU_EVENTS API. If the memory poison consumption or the fault that cause SEA is not from guest kernel, the blast radius can be limited to the consuming or faulting guest userspace process, so the VM can keep running. In addition, instead of doing under the hood without involving userspace, there are benefits to redirect the SEA to VMM: - VM customers care about the disruptions caused by memory errors, and VMM usually has the responsibility to start the process of notifying the customers of memory error events in their VMs. For example some cloud provider emits a critical log in their observability UI [1], and provides playbook for customers on how to mitigate disruptions to their workloads. - VMM can protect future memory error consumption by unmapping the poisoned pages from stage-2 page table with KVM userfault, or by splitting the memslot that contains the poisoned guest pages [2]. - VMM can keep track of SEA events in the VM. When VMM thinks the status on the host or the VM is bad enough, e.g. number of distinct SEAs exceeds a threshold, it can restart the VM on another healthy host. - Behavior parity with x86 architecture. When machine check exception (MCE) is caused by VCPU, kernel or KVM signals userspace SIGBUS to let VMM either recover from the MCE, or terminate itself with VM. The prior RFC proposes to implement SIGBUS on arm64 as well, but Marc preferred VCPU exit over signal [3]. However, implementation aside, returning SEA to VMM is on par with returning MCE to VMM. Once SEA is redirected to VMM, among other actions, VMM is encouraged to inject external aborts into the faulting VCPU, which is already supported by KVM on arm64. We notice injecting instruction abort is not fully supported by KVM_SET_VCPU_EVENTS. Complement it in the patchset. New UAPIs ========= This patchset introduces following userspace-visiable changes to empower VMM to control what happens next for SEA on guest memory: - KVM_CAP_ARM_SEA_TO_USER. While taking SEA, if userspace has enabled this new capability at VM creation, and the SEA is not caused by memory allocated for stage-2 translation table, instead of injecting SError, return KVM_EXIT_ARM_SEA to userspace. - KVM_EXIT_ARM_SEA. This is the VM exit reason VMM gets. The details about the SEA is provided in arm_sea as much as possible, including sanitized ESR value at EL2, if guest virtual and physical addresses (GPA and GVA) are available and the values if available. - KVM_CAP_ARM_INJECT_EXT_IABT. VMM today can inject external data abort to VCPU via KVM_SET_VCPU_EVENTS API. However, in case of instruction abort, VMM cannot inject it via KVM_SET_VCPU_EVENTS. KVM_CAP_ARM_INJECT_EXT_IABT is just a natural extend to KVM_CAP_ARM_INJECT_EXT_DABT that tells VMM KVM_SET_VCPU_EVENTS now supports external instruction abort. * From v1 [4]: - Rebased on commit 4d62121ce9b5 ("KVM: arm64: vgic-debug: Avoid dereferencing NULL ITE pointer"). - Sanitize ESR_EL2 before reporting it to userspace. - Do not do KVM_EXIT_ARM_SEA when SEA is caused by memory allocated to stage-2 translation table. [1] https://cloud.google.com/solutions/sap/docs/manage-host-errors [2] https://lore.kernel.org/kvm/20250109204929.1106563-1-jthoughton@google.com [3] https://lore.kernel.org/kvm/86pljbqqh0.wl-maz@kernel.org [4] https://lore.kernel.org/kvm/20250505161412.1926643-1-jiaqiyan@google.com Jiaqi Yan (5): KVM: arm64: VM exit to userspace to handle SEA KVM: arm64: Set FnV for VCPU when FAR_EL2 is invalid KVM: selftests: Test for KVM_EXIT_ARM_SEA and KVM_CAP_ARM_SEA_TO_USER KVM: selftests: Test for KVM_CAP_INJECT_EXT_IABT Documentation: kvm: new uAPI for handling SEA Raghavendra Rao Ananta (1): KVM: arm64: Allow userspace to inject external instruction aborts Documentation/virt/kvm/api.rst | 128 ++++++- arch/arm64/include/asm/kvm_emulate.h | 67 ++++ arch/arm64/include/asm/kvm_host.h | 8 + arch/arm64/include/asm/kvm_ras.h | 2 +- arch/arm64/include/uapi/asm/kvm.h | 3 +- arch/arm64/kvm/arm.c | 6 + arch/arm64/kvm/guest.c | 13 +- arch/arm64/kvm/inject_fault.c | 3 + arch/arm64/kvm/mmu.c | 59 ++- include/uapi/linux/kvm.h | 12 + tools/arch/arm64/include/asm/esr.h | 2 + tools/arch/arm64/include/uapi/asm/kvm.h | 3 +- tools/testing/selftests/kvm/Makefile.kvm | 2 + .../testing/selftests/kvm/arm64/inject_iabt.c | 98 +++++ .../testing/selftests/kvm/arm64/sea_to_user.c | 340 ++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 1 + 16 files changed, 718 insertions(+), 29 deletions(-) create mode 100644 tools/testing/selftests/kvm/arm64/inject_iabt.c create mode 100644 tools/testing/selftests/kvm/arm64/sea_to_user.c -- 2.49.0.1266.g31b7d2e469-goog

3 weeks, 3 days

2
20
0 0

[PATCH] kunit: tool: Accept --raw_output=full as an alias of 'all'

by David Gow

I can never remember whether --raw_output takes 'all' or 'full'. No reason we can't support both. For the record, 'all' is the recommended, documented option. Signed-off-by: David Gow <davidgow(a)google.com> --- tools/testing/kunit/kunit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 7f9ae55fd6d5..cd99c1956331 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -228,7 +228,7 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input fake_test.counts.passed = 1 output: Iterable[str] = input_data - if request.raw_output == 'all': + if request.raw_output == 'all' or request.raw_output == 'full': pass elif request.raw_output == 'kunit': output = kunit_parser.extract_tap_lines(output) @@ -425,7 +425,7 @@ def add_parse_opts(parser: argparse.ArgumentParser) -> None: parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. ' 'By default, filters to just KUnit output. Use ' '--raw_output=all to show everything', - type=str, nargs='?', const='all', default=None, choices=['all', 'kunit']) + type=str, nargs='?', const='all', default=None, choices=['all', 'full', 'kunit']) parser.add_argument('--json', nargs='?', help='Prints parsed test results as JSON to stdout or a file if ' -- 2.50.1.552.g942d659e1b-goog

3 weeks, 3 days

2
1
0 0

Re: [PATCH] selftests: timers: improve adjtick output readability

by Thomas Gleixner

Vishal! On Wed, Jul 30 2025 at 23:35, Vishal Parmar wrote: Please do not top-post and trim your replies. > The intent behind this change is to make output useful as is. > for example, to provide a performance report in case of regression. The point John was making: >> So it might be worth looking into getting the output to be happy with >> TAP while you're tweaking things here. The kernel selftests are converting over to standardized TAP output format, which is intended to aid automated testing. So if we change the outpot format of this test, then we switch it over to TAP format and do not invent yet another randomized output scheme. > CSV format is also a good alternative if the maintainer prefers that. The most important information is whether the test succeeded or not and CSV format is not helping either to conform with the test output standards. For the success case, the actual numbers are uninteresting. In the failure case it's sufficient to emit: ksft_test_result_fail("Req: NNNN, Exp: $MMMM, Res: $LLLL\n", ...); In case of regressions (fail), a report providing this output is good enough for the relevant maintainer/developer to start investigating. No? Thanks, tglx

3 weeks, 3 days

2
1
0 0

[PATCH] selftests: ALSA: fix memory leak in utimer test

by WangYuli

Free the malloc'd buffer in TEST_F(timer_f, utimer) to prevent memory leak. Reported-by: Jun Zhan <zhanjun(a)uniontech.com> Signed-off-by: WangYuli <wangyuli(a)uniontech.com> --- tools/testing/selftests/alsa/utimer-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c index 32ee3ce57721..37964f311a33 100644 --- a/tools/testing/selftests/alsa/utimer-test.c +++ b/tools/testing/selftests/alsa/utimer-test.c @@ -135,6 +135,7 @@ TEST_F(timer_f, utimer) { pthread_join(ticking_thread, NULL); ASSERT_EQ(total_ticks, TICKS_COUNT); pclose(rfp); + free(buf); } TEST(wrong_timers_test) { -- 2.50.1

3 weeks, 3 days

2
1
0 0

Crediting test authors

by Jakub Kicinski

Hi! Does anyone have ideas about crediting test authors or tests for bugs discovered? We increasingly see situations where someone adds a test then our subsystem CI uncovers a (1 in a 100 runs) bug using that test. Using reported-by doesn't feel right. But credit should go to the person who wrote the test. Is anyone else having this dilemma?

3 weeks, 3 days

5
9
0 0

[PATCH RFC v2 0/4] procfs: make reference pidns more user-visible

by Aleksa Sarai

Ever since the introduction of pid namespaces, procfs has had very implicit behaviour surrounding them (the pidns used by a procfs mount is auto-selected based on the mounting process's active pidns, and the pidns itself is basically hidden once the mount has been constructed). /* pidns mount option for procfs */ This implicit behaviour has historically meant that userspace was required to do some special dances in order to configure the pidns of a procfs mount as desired. Examples include: * In order to bypass the mnt_too_revealing() check, Kubernetes creates a procfs mount from an empty pidns so that user namespaced containers can be nested (without this, the nested containers would fail to mount procfs). But this requires forking off a helper process because you cannot just one-shot this using mount(2). * Container runtimes in general need to fork into a container before configuring its mounts, which can lead to security issues in the case of shared-pidns containers (a privileged process in the pidns can interact with your container runtime process). While SUID_DUMP_DISABLE and user namespaces make this less of an issue, the strict need for this due to a minor uAPI wart is kind of unfortunate. Things would be much easier if there was a way for userspace to just specify the pidns they want. Patch 1 implements a new "pidns" argument which can be set using fsconfig(2): fsconfig(procfd, FSCONFIG_SET_FD, "pidns", NULL, nsfd); fsconfig(procfd, FSCONFIG_SET_STRING, "pidns", "/proc/self/ns/pid", 0); or classic mount(2) / mount(8): // mount -t proc -o pidns=/proc/self/ns/pid proc /tmp/proc mount("proc", "/tmp/proc", "proc", MS_..., "pidns=/proc/self/ns/pid"); The initial security model I have in this RFC is to be as conservative as possible and just mirror the security model for setns(2) -- which means that you can only set pidns=... to pid namespaces that your current pid namespace is a direct ancestor of and you have CAP_SYS_ADMIN privileges over the pid namespace. This fulfils the requirements of container runtimes, but I suspect that this may be too strict for some usecases. The pidns argument is not displayed in mountinfo -- it's not clear to me what value it would make sense to show (maybe we could just use ns_dname to provide an identifier for the namespace, but this number would be fairly useless to userspace). I'm open to suggestions. Note that PROCFS_GET_PID_NAMESPACE (see below) does at least let userspace get information about this outside of mountinfo. /* ioctl(PROCFS_GET_PID_NAMESPACE) */ In addition, being able to figure out what pid namespace is being used by a procfs mount is quite useful when you have an administrative process (such as a container runtime) which wants to figure out the correct way of mapping PIDs between its own namespace and the namespace for procfs (using NS_GET_{PID,TGID}_{IN,FROM}_PIDNS). There are alternative ways to do this, but they all rely on ancillary information that third-party libraries and tools do not necessarily have access to. To make this easier, add a new ioctl (PROCFS_GET_PID_NAMESPACE) which can be used to get a reference to the pidns that a procfs is using. It's not quite clear what is the correct security model for this API, but the current approach I've taken is to: * Make the ioctl only valid on the root (meaning that a process without access to the procfs root -- such as only having an fd to a procfs file or some open_tree(2)-like subset -- cannot use this API). * Require that the process requesting either has access to /proc/1/ns/pid anyway (i.e. has ptrace-read access to the pidns pid1), has CAP_SYS_ADMIN access to the pidns (i.e. has administrative access to it and can join it if they had a handle), or is in a pidns that is a direct ancestor of the target pidns (i.e. all of the pids are already visible in the procfs for the current process's pidns). The security model for this is a little loose, as it seems to me that all of the cases mentioned are valid cases to allow access, but I'm open to suggestions for whether we need to make this stricter or looser. Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com> --- Changes in v2: - #ifdef CONFIG_PID_NS - Improve cover letter wording to make it clear we're talking about two separate features with different permission models. [Andy Lutomirski] - Fix build warnings in pidns_is_ancestor() patch. [kernel test robot] - v1: <https://lore.kernel.org/r/20250721-procfs-pidns-api-v1-0-5cd9007e512d@cypha…> --- Aleksa Sarai (4): pidns: move is-ancestor logic to helper procfs: add "pidns" mount option procfs: add PROCFS_GET_PID_NAMESPACE ioctl selftests/proc: add tests for new pidns APIs Documentation/filesystems/proc.rst | 10 ++ fs/proc/root.c | 144 ++++++++++++++- include/linux/pid_namespace.h | 9 + include/uapi/linux/fs.h | 3 + kernel/pid_namespace.c | 23 ++- tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/proc/Makefile | 1 + tools/testing/selftests/proc/proc-pidns.c | 286 ++++++++++++++++++++++++++++++ 8 files changed, 461 insertions(+), 16 deletions(-) --- base-commit: 4c838c7672c39ec6ec48456c6ce22d14a68f4cda change-id: 20250717-procfs-pidns-api-8ed1583431f0 Best regards, -- Aleksa Sarai <cyphar(a)cyphar.com>

3 weeks, 3 days

2
12
0 0

[PATCH v2] selftests/bpf: Add missing kfunc declarations to fix build errors

by Jiawei Zhao

A number of BPF selftests that utilize kernel functions (kfuncs) fail to build due to missing function prototypes. This results in compilation errors, as implicit function declarations are treated as errors: error: call to undeclared function 'bpf_copy_from_user_task_str'; ISO C99 and later do not support implicit function declarations Unlike BPF helpers, kfuncs are not automatically available to BPF programs and must be explicitly declared before use. To resolve this, centralize all the necessary kfunc declarations into the `bpf_kfuncs.h` header file. This header is then included in all the test programs that were previously missing these declarations. This approach also allows for the removal of redundant local `extern` declarations from individual source files (e.g., in `irq.c`), leading to cleaner and more maintainable code. Change since v1: - Add a kfunc declaration for __bpf_trap in bpf_kfuncs.h Signed-off-by: Jiawei Zhao <phoenix500526(a)163.com> --- tools/testing/selftests/bpf/bpf_kfuncs.h | 65 +++++++++++++++++++ .../selftests/bpf/progs/bpf_iter_tasks.c | 1 + .../bpf/progs/bpf_qdisc_fail__incompl_ops.c | 1 + .../selftests/bpf/progs/bpf_qdisc_fifo.c | 1 + .../selftests/bpf/progs/bpf_qdisc_fq.c | 1 + .../selftests/bpf/progs/cgroup_read_xattr.c | 1 + .../testing/selftests/bpf/progs/dmabuf_iter.c | 1 + .../selftests/bpf/progs/dynptr_success.c | 1 + tools/testing/selftests/bpf/progs/irq.c | 6 +- .../selftests/bpf/progs/linked_list_peek.c | 1 + .../selftests/bpf/progs/rbtree_search.c | 1 + .../selftests/bpf/progs/rcu_read_lock.c | 1 + .../selftests/bpf/progs/read_cgroupfs_xattr.c | 1 + .../selftests/bpf/progs/res_spin_lock.c | 1 + .../selftests/bpf/progs/res_spin_lock_fail.c | 1 + .../struct_ops_refcounted_fail__tail_call.c | 1 + .../selftests/bpf/progs/test_spin_lock_fail.c | 1 + .../selftests/bpf/progs/verifier_bpf_trap.c | 1 + 18 files changed, 82 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 8215c9b3115e..a08c865b737a 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -2,6 +2,7 @@ #define __BPF_KFUNCS__ struct bpf_sock_addr_kern; +struct bpf_res_spin_lock; /* Description * Initializes an skb-type dynptr @@ -42,6 +43,28 @@ extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak; +extern int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, __u32 dst_off, struct bpf_dynptr *src_ptr, + __u32 src_off, __u32 size) __ksym __weak; +extern int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, __u32 off, + __u32 size, const void *unsafe_ptr__ign) __ksym __weak; +extern int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, __u32 off, + __u32 size, const void *unsafe_ptr__ign) __ksym __weak; +extern int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, __u32 off, + __u32 size, const void *unsafe_ptr__ign) __ksym __weak; +extern int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, __u32 off, + __u32 size, const void *unsafe_ptr__ign) __ksym __weak; +extern int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, __u32 off, + __u32 size, const void *unsafe_ptr__ign) __ksym __weak; +extern int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, __u32 off, + __u32 size, const void *unsafe_ptr__ign) __ksym __weak; +extern int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, __u32 off, + __u32 size, const void *unsafe_ptr__ign, + struct task_struct *tsk) __ksym __weak; +extern int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, __u32 off, + __u32 size, const void *unsafe_ptr__ign, + struct task_struct *tsk) __ksym __weak; +extern int bpf_copy_from_user_task_str(void *dst, __u32, const void *, + struct task_struct *, __u64) __ksym __weak; /* Description * Modify the address of a AF_UNIX sockaddr. @@ -92,4 +115,46 @@ extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str, const struct bpf_dynptr *value_p, int flags) __ksym __weak; extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak; +extern void bpf_local_irq_save(unsigned long *) __ksym __weak; +extern void bpf_local_irq_restore(unsigned long *) __ksym __weak; +extern int bpf_copy_from_user_str(void *dst, __u32 dst__sz, + const void *unsafe_ptr__ign, __u64 flags) __ksym __weak; +extern int bpf_res_spin_lock_irqsave(struct bpf_res_spin_lock *lock, + unsigned long *flags__irq_flag) __ksym __weak; +extern void bpf_res_spin_unlock_irqrestore(struct bpf_res_spin_lock *lock, + unsigned long *flags__irq_flag) __ksym __weak; +extern int bpf_res_spin_lock(struct bpf_res_spin_lock *lock) __ksym __weak; +extern void bpf_res_spin_unlock(struct bpf_res_spin_lock *lock) __ksym __weak; + +extern struct bpf_list_node *bpf_list_front(struct bpf_list_head *head) __ksym __weak; +extern struct bpf_list_node *bpf_list_back(struct bpf_list_head *head) __ksym __weak; + +struct bpf_sk_buff_ptr; +struct sk_buff; +struct Qdisc; + +extern void bpf_qdisc_skb_drop(struct sk_buff *skb, + struct bpf_sk_buff_ptr *to_free_list) __ksym __weak; +extern void bpf_qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb) __ksym __weak; +extern void bpf_kfree_skb(struct sk_buff *skb) __ksym __weak; +extern __u32 bpf_skb_get_hash(struct sk_buff *) __ksym __weak; +extern void bpf_qdisc_watchdog_schedule(struct Qdisc *sch, __u64 expire, + __u64 delta_ns) __ksym __weak; + +extern struct cgroup *bpf_cgroup_from_id(__u64 cgid) __ksym __weak; +extern void bpf_cgroup_release(struct cgroup *cgrp) __ksym __weak; +extern void bpf_rcu_read_lock(void) __ksym __weak; +extern void bpf_rcu_read_unlock(void) __ksym __weak; +extern struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym __weak; + + +extern struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root) __ksym __weak; +extern struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, + struct bpf_rb_node *node) __ksym __weak; +extern struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, + struct bpf_rb_node *node) __ksym __weak; + +extern void bpf_task_release(struct task_struct *p) __ksym __weak; +extern void __bpf_trap(void) __ksym __weak; + #endif diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c index 966ee5a7b066..63daf05366df 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c index f188062ed730..7f1a5a1b5dac 100644 --- a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include "bpf_experimental.h" #include "bpf_qdisc_common.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c index 1de2be3e370b..9ae41518d578 100644 --- a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include "bpf_experimental.h" #include "bpf_qdisc_common.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c index 1a3233a275c7..f86981bc2a09 100644 --- a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c @@ -37,6 +37,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_experimental.h" #include "bpf_qdisc_common.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c index 092db1d0435e..50162ca905cc 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c +++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c @@ -7,6 +7,7 @@ #include <bpf/bpf_core_read.h> #include "bpf_experimental.h" #include "bpf_misc.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c index 13cdb11fdeb2..df0021dc54da 100644 --- a/tools/testing/selftests/bpf/progs/dmabuf_iter.c +++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" /* From uapi/linux/dma-buf.h */ #define DMA_BUF_NAME_LEN 32 diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index a0391f9da2d4..95bcdf465c4b 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -8,6 +8,7 @@ #include <bpf/bpf_tracing.h> #include "bpf_misc.h" #include "errno.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c index 74d912b22de9..ce3b2509e6f1 100644 --- a/tools/testing/selftests/bpf/progs/irq.c +++ b/tools/testing/selftests/bpf/progs/irq.c @@ -4,13 +4,9 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" #include "bpf_experimental.h" +#include "bpf_kfuncs.h" unsigned long global_flags; - -extern void bpf_local_irq_save(unsigned long *) __weak __ksym; -extern void bpf_local_irq_restore(unsigned long *) __weak __ksym; -extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym; - struct bpf_res_spin_lock lockA __hidden SEC(".data.A"); struct bpf_res_spin_lock lockB __hidden SEC(".data.B"); diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c index 264e81bfb287..00d5299eeb0a 100644 --- a/tools/testing/selftests/bpf/progs/linked_list_peek.c +++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c @@ -5,6 +5,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" #include "bpf_experimental.h" +#include "bpf_kfuncs.h" struct node_data { struct bpf_list_node l; diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c index 098ef970fac1..681ea24d6877 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_search.c +++ b/tools/testing/selftests/bpf/progs/rbtree_search.c @@ -5,6 +5,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" #include "bpf_experimental.h" +#include "bpf_kfuncs.h" struct node_data { struct bpf_refcount ref; diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 43637ee2cdcd..386559f026dd 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -6,6 +6,7 @@ #include <bpf/bpf_tracing.h> #include "bpf_tracing_net.h" #include "bpf_misc.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c index 855f85fc5522..0575e08ae108 100644 --- a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c +++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c @@ -6,6 +6,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include "bpf_experimental.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c index 22c4fb8b9266..8d21b7ae0a18 100644 --- a/tools/testing/selftests/bpf/progs/res_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c @@ -4,6 +4,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_kfuncs.h" #define EDEADLK 35 #define ETIMEDOUT 110 diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c index 330682a88c16..d643ff783798 100644 --- a/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c +++ b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c @@ -6,6 +6,7 @@ #include <bpf/bpf_core_read.h> #include "bpf_misc.h" #include "bpf_experimental.h" +#include "bpf_kfuncs.h" struct arr_elem { struct bpf_res_spin_lock lock; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c index 3b125025a1f2..7661658848f4 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c @@ -4,6 +4,7 @@ #include <bpf/bpf_tracing.h> #include "../test_kmods/bpf_testmod.h" #include "bpf_misc.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c index f678ee6bd7ea..aee2791ad863 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -3,6 +3,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> #include "bpf_experimental.h" +#include "bpf_kfuncs.h" struct foo { struct bpf_spin_lock lock; diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c index 35e2cdc00a01..9d89ab6f5c58 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_kfuncs.h" #if __clang_major__ >= 21 && 0 SEC("socket") -- 2.43.0

3 weeks, 3 days

1
0
0 0

[PATCH] tools/nolibc: fix error return value of clock_nanosleep()

by Thomas Weißschuh

clock_nanosleep() returns a positive error value. Unlike other libc functions it *does not* return -1 nor set errno. Fix the return value and also adapt nanosleep(). Fixes: 7c02bc4088af ("tools/nolibc: add support for clock_nanosleep() and nanosleep()") Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- tools/include/nolibc/time.h | 5 +++-- tools/testing/selftests/nolibc/nolibc-test.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index d02bc44d2643a5e39afa808841f7175bfab5ff7e..e9c1b976791a65c0d73268bebbcfd4f2a57a47ee 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -133,7 +133,8 @@ static __attribute__((unused)) int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, struct timespec *rmtp) { - return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp)); + /* Directly return a positive error number */ + return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp); } static __inline__ @@ -145,7 +146,7 @@ double difftime(time_t time1, time_t time2) static __inline__ int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) { - return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp); + return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp)); } diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index a297ee0d6d0754dfcd9f9e5609d42c7442dabc4e..cc4d730ac4656fb5944d50be9477a3dfefb00aa0 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1334,6 +1334,7 @@ int run_syscall(int min, int max) CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break; CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; CASE_TEST(chroot_exe); EXPECT_SYSER(1, chroot(argv0), -1, ENOTDIR); break; + CASE_TEST(clock_nanosleep); ts.tv_nsec = -1; EXPECT_EQ(1, EINVAL, clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL)); break; CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break; CASE_TEST(close_dup); EXPECT_SYSZR(1, close(dup(0))); break; CASE_TEST(dup_0); tmp = dup(0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; --- base-commit: 260f6f4fda93c8485c8037865c941b42b9cba5d2 change-id: 20250731-nolibc-clock_nanosleep-ret-b03a299c083f Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

3 weeks, 3 days

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror July 2025