Add tests for the new charge_pid_fd field in struct dma_heap_allocation_data.
When the charge_pid_fd feature is absent (unpatched kernel), the probe in pidfd_alloc_supported() detects this and the tests are skipped gracefully.
Add vmtest.sh similar to other subsystem suites, to orchestrate building the selftests (optionally with a freshly compiled kernel) inside a virtme-ng VM, so the tests can be run without modifying the host system. Add a config fragment with required Kconfig symbols.
Also add test_memcg_dmabuf() to the existing test_memcontrol suite to verify end-to-end cross-cgroup accounting: a parent process opens a pidfd for a child in a separate cgroup, allocates a dma-buf via DMA_HEAP_IOCTL_ALLOC with that pidfd, and asserts that memory.stat dmabuf in the child's cgroup reflects the allocation. If the dmabuf key is missing (unpatched kernel) or /dev/dma_heap/system is absent, the test is skipped.
Assisted-by: Claude:claude-sonnet-4-6 Cursor Signed-off-by: Albert Esteve aesteve@redhat.com --- tools/testing/selftests/cgroup/Makefile | 2 +- tools/testing/selftests/cgroup/test_memcontrol.c | 143 +++++++++++++- tools/testing/selftests/dmabuf-heaps/config | 1 + tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c | 126 ++++++++++++- tools/testing/selftests/dmabuf-heaps/vmtest.sh | 205 +++++++++++++++++++++ 5 files changed, 473 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index e01584c2189ac..9edfc9f1de5c4 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wall -pthread +CFLAGS += -Wall -pthread $(KHDR_INCLUDES)
all: ${HELPER_PROGS}
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index b43da9bc20c49..b6a228407530f 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -19,9 +19,17 @@ #include <errno.h> #include <sys/mman.h>
+#include <linux/dma-heap.h> +#include <signal.h> +#include <sys/ioctl.h> + +#include "../pidfd/pidfd.h" #include "kselftest.h" #include "cgroup_util.h"
+#define DMA_HEAP_SYSTEM "/dev/dma_heap/system" +#define ONE_MEG (1024 * 1024) + #define MEMCG_SOCKSTAT_WAIT_RETRIES 30
static bool has_localevents; @@ -1762,6 +1770,125 @@ static int test_memcg_inotify_delete_dir(const char *root) return ret; }
+static int memcg_dmabuf_child(const char *cgroup, void *arg) +{ + pause(); + return 0; +} + +/* + * This test allocates a dma-buf via DMA_HEAP_IOCTL_ALLOC with a pidfd + * pointing to a child process in a separate cgroup, then checks that + * memory.stat[dmabuf] in the child's cgroup rises by the allocation size + * and returns to zero after the buffer fd is closed. + */ +static int test_memcg_dmabuf(const char *root) +{ + char *parent = NULL, *child_cg = NULL; + int ret = KSFT_FAIL; + int heap_fd = -1, dmabuf_fd = -1, pidfd = -1; + pid_t child_pid; + int child_status; + long dmabuf_stat; + struct dma_heap_allocation_data alloc = { + .len = ONE_MEG, + .fd_flags = O_RDWR | O_CLOEXEC, + }; + + if (access(DMA_HEAP_SYSTEM, R_OK | W_OK)) { + ret = KSFT_SKIP; + goto cleanup; + } + + parent = cg_name(root, "dmabuf_memcg_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup_parent; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup_parent; + + child_cg = cg_name(parent, "child"); + if (!child_cg) + goto cleanup_parent; + + if (cg_create(child_cg)) + goto cleanup_parent; + + child_pid = cg_run_nowait(child_cg, memcg_dmabuf_child, NULL); + if (child_pid < 0) + goto cleanup_child; + + if (cg_wait_for_proc_count(child_cg, 1)) + goto cleanup_kill; + + pidfd = sys_pidfd_open(child_pid, 0); + if (pidfd < 0) { + ret = KSFT_SKIP; + goto cleanup_kill; + } + + heap_fd = open(DMA_HEAP_SYSTEM, O_RDWR); + if (heap_fd < 0) { + ret = KSFT_SKIP; + goto cleanup_pidfd; + } + + alloc.charge_pid_fd = (__u32)pidfd; + if (ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &alloc) < 0) + goto cleanup_heap; + dmabuf_fd = (int)alloc.fd; + + dmabuf_stat = cg_read_key_long(child_cg, "memory.stat", "dmabuf "); + if (dmabuf_stat == -1) { + ret = KSFT_SKIP; + goto cleanup_dmabuf; + } + if (dmabuf_stat != ONE_MEG) + dmabuf_stat = cg_read_key_long_poll(child_cg, "memory.stat", + "dmabuf ", ONE_MEG, + 15, 200000); + if (dmabuf_stat != ONE_MEG) { + fprintf(stderr, "Expected dmabuf stat %d, got %ld\n", + ONE_MEG, dmabuf_stat); + goto cleanup_dmabuf; + } + + close(dmabuf_fd); + dmabuf_fd = -1; + + dmabuf_stat = cg_read_key_long_poll(child_cg, "memory.stat", + "dmabuf ", 0, 15, 200000); + if (dmabuf_stat != 0) { + fprintf(stderr, "Expected dmabuf stat 0 after close, got %ld\n", + dmabuf_stat); + goto cleanup_heap; + } + + ret = KSFT_PASS; + +cleanup_dmabuf: + if (dmabuf_fd >= 0) + close(dmabuf_fd); +cleanup_heap: + close(heap_fd); +cleanup_pidfd: + close(pidfd); +cleanup_kill: + kill(child_pid, SIGTERM); + waitpid(child_pid, &child_status, 0); +cleanup_child: + cg_destroy(child_cg); + free(child_cg); +cleanup_parent: + cg_destroy(parent); + free(parent); +cleanup: + return ret; +} + #define T(x) { x, #x } struct memcg_test { int (*fn)(const char *root); @@ -1783,16 +1910,26 @@ struct memcg_test { T(test_memcg_oom_group_score_events), T(test_memcg_inotify_delete_file), T(test_memcg_inotify_delete_dir), + T(test_memcg_dmabuf), }; #undef T
int main(int argc, char **argv) { char root[PATH_MAX]; - int i, proc_status; + int i, proc_status, plan; + const char *filter = NULL; + + if (argc > 1) + filter = argv[1]; + + plan = 0; + for (i = 0; i < ARRAY_SIZE(tests); i++) + if (!filter || !strcmp(tests[i].name, filter)) + plan++;
ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(tests)); + ksft_set_plan(plan); if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -1818,6 +1955,8 @@ int main(int argc, char **argv) has_localevents = proc_status;
for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (filter && strcmp(tests[i].name, filter)) + continue; switch (tests[i].fn(root)) { case KSFT_PASS: ksft_test_result_pass("%s\n", tests[i].name); diff --git a/tools/testing/selftests/dmabuf-heaps/config b/tools/testing/selftests/dmabuf-heaps/config index be091f1cdfa04..94c8f33b71a28 100644 --- a/tools/testing/selftests/dmabuf-heaps/config +++ b/tools/testing/selftests/dmabuf-heaps/config @@ -1,3 +1,4 @@ +CONFIG_MEMCG=y CONFIG_DMABUF_HEAPS=y CONFIG_DMABUF_HEAPS_SYSTEM=y CONFIG_DRM_VGEM=y diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index fc9694fc4e89e..904332b17698a 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -3,6 +3,7 @@ #include <dirent.h> #include <errno.h> #include <fcntl.h> +#include <signal.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> @@ -10,11 +11,14 @@ #include <unistd.h> #include <sys/ioctl.h> #include <sys/mman.h> +#include <sys/syscall.h> #include <sys/types.h> +#include <sys/wait.h>
#include <linux/dma-buf.h> #include <linux/dma-heap.h> #include <drm/drm.h> +#include "../pidfd/pidfd.h" #include "kselftest.h"
#define DEVPATH "/dev/dma_heap" @@ -320,6 +324,8 @@ static int dmabuf_heap_alloc_newer(int fd, size_t len, unsigned int flags, __u32 fd; __u32 fd_flags; __u64 heap_flags; + __u32 charge_pid_fd; + __u32 __padding; __u64 garbage1; __u64 garbage2; __u64 garbage3; @@ -328,6 +334,8 @@ static int dmabuf_heap_alloc_newer(int fd, size_t len, unsigned int flags, .fd = 0, .fd_flags = O_RDWR | O_CLOEXEC, .heap_flags = flags, + .charge_pid_fd = 0, + .__padding = 0, .garbage1 = 0xffffffff, .garbage2 = 0x88888888, .garbage3 = 0x11111111, @@ -390,6 +398,120 @@ static void test_alloc_errors(char *heap_name) close(heap_fd); }
+static int dmabuf_heap_alloc_pidfd(int fd, size_t len, unsigned int heap_flags, + unsigned int charge_pid_fd, int *dmabuf_fd) +{ + struct dma_heap_allocation_data data = { + .len = len, + .fd = 0, + .fd_flags = O_RDWR | O_CLOEXEC, + .heap_flags = heap_flags, + .charge_pid_fd = charge_pid_fd, + }; + int ret; + + if (!dmabuf_fd) + return -EINVAL; + + ret = ioctl(fd, DMA_HEAP_IOCTL_ALLOC, &data); + if (ret < 0) + return ret; + *dmabuf_fd = (int)data.fd; + return ret; +} + +/* + * Probe whether the kernel honours charge_pid_fd in DMA_HEAP_IOCTL_ALLOC. + */ +static bool pidfd_alloc_supported(int heap_fd) +{ + int devnull_fd, dmabuf_fd = -1, ret; + + devnull_fd = open("/dev/null", O_RDONLY); + if (devnull_fd < 0) + return false; + + ret = dmabuf_heap_alloc_pidfd(heap_fd, ONE_MEG, 0, devnull_fd, &dmabuf_fd); + if (dmabuf_fd >= 0) { + close(dmabuf_fd); + dmabuf_fd = -1; + } + close(devnull_fd); + return ret < 0; +} + +/* + * Test: allocate charging the calling process's own cgroup via a self pidfd. + */ +static void test_alloc_pidfd_self(char *heap_name) +{ + int heap_fd = -1, pidfd = -1, dmabuf_fd = -1, ret; + + heap_fd = dmabuf_heap_open(heap_name); + + if (!pidfd_alloc_supported(heap_fd)) { + ksft_test_result_skip("charge_pid_fd not supported by this kernel\n"); + goto out; + } + + pidfd = sys_pidfd_open(getpid(), 0); + if (pidfd < 0) { + ksft_test_result_skip("pidfd_open not available\n"); + goto out; + } + + ret = dmabuf_heap_alloc_pidfd(heap_fd, ONE_MEG, 0, pidfd, &dmabuf_fd); + ksft_test_result(!ret, "Allocation with self pidfd %d\n", ret); + if (dmabuf_fd >= 0) + close(dmabuf_fd); + close(pidfd); +out: + close(heap_fd); +} + +/* + * Test: allocate charging a child process's cgroup via a child pidfd. + */ +static void test_alloc_pidfd_child(char *heap_name) +{ + int heap_fd = -1, pidfd = -1, dmabuf_fd = -1; + pid_t child_pid; + int status, ret; + + heap_fd = dmabuf_heap_open(heap_name); + + if (!pidfd_alloc_supported(heap_fd)) { + ksft_test_result_skip("charge_pid_fd not supported by this kernel\n"); + goto out; + } + + child_pid = fork(); + if (child_pid == 0) { + pause(); + _exit(0); + } + if (child_pid < 0) + ksft_exit_fail_msg("fork failed: %s\n", strerror(errno)); + + pidfd = sys_pidfd_open(child_pid, 0); + if (pidfd < 0) { + kill(child_pid, SIGTERM); + waitpid(child_pid, &status, 0); + ksft_test_result_skip("pidfd_open for child failed\n"); + goto out; + } + + ret = dmabuf_heap_alloc_pidfd(heap_fd, ONE_MEG, 0, pidfd, &dmabuf_fd); + ksft_test_result(!ret, "Allocation with child pidfd %d\n", ret); + if (dmabuf_fd >= 0) + close(dmabuf_fd); + close(pidfd); + kill(child_pid, SIGTERM); + waitpid(child_pid, &status, 0); +out: + close(heap_fd); +} + static int numer_of_heaps(void) { DIR *d = opendir(DEVPATH); @@ -420,7 +542,7 @@ int main(void) return KSFT_SKIP; }
- ksft_set_plan(11 * numer_of_heaps()); + ksft_set_plan(13 * numer_of_heaps());
while ((dir = readdir(d))) { if (!strncmp(dir->d_name, ".", 2)) @@ -435,6 +557,8 @@ int main(void) test_alloc_zeroed(dir->d_name, ONE_MEG); test_alloc_compat(dir->d_name); test_alloc_errors(dir->d_name); + test_alloc_pidfd_self(dir->d_name); + test_alloc_pidfd_child(dir->d_name); } closedir(d);
diff --git a/tools/testing/selftests/dmabuf-heaps/vmtest.sh b/tools/testing/selftests/dmabuf-heaps/vmtest.sh new file mode 100755 index 0000000000000..6f1a878384127 --- /dev/null +++ b/tools/testing/selftests/dmabuf-heaps/vmtest.sh @@ -0,0 +1,205 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2026 Red Hat +# +# Dependencies: +# * virtme-ng +# * qemu (used by virtme-ng) + +readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" +readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../) +readonly CGROUP_DIR="${KERNEL_CHECKOUT}/tools/testing/selftests/cgroup" + +source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh + +readonly DMABUF_HEAP_TEST="${SCRIPT_DIR}"/dmabuf-heap +readonly MEMCONTROL_TEST="${CGROUP_DIR}"/test_memcontrol +readonly TMP_DIR=$(mktemp -d /tmp/dmabuf-vmtest.XXXXXXXX) + +VERBOSE=false +BUILD=false +BUILD_HOST="" +BUILD_HOST_PODMAN_CONTAINER_NAME="" + +usage() { + echo + echo "$0 [OPTIONS]" + echo + echo "Options" + echo " -b: build the kernel from the current source tree and use it for the VM" + echo " -H: hostname for remote build host (used with -b)" + echo " -p: podman container name for remote build host (used with -b)" + echo " Example: -H beefyserver -p vng" + + echo " -v: enable verbose vng/qemu output" + echo + + exit 1 +} + +die() { + echo "$*" >&2 + exit "${KSFT_FAIL}" +} + +cleanup() { + rm -rf "${TMP_DIR}" +} + +check_deps() { + for dep in vng make; do + if [[ ! -x $(command -v "${dep}") ]]; then + echo -e "skip: dependency ${dep} not found!\n" + exit "${KSFT_SKIP}" + fi + done + + if [[ ! -x "${DMABUF_HEAP_TEST}" ]]; then + printf "skip: %s not found!" "${DMABUF_HEAP_TEST}" + printf " Please build the kselftest dmabuf-heaps target (or use -b).\n" + exit "${KSFT_SKIP}" + fi + + if [[ ! -x "${MEMCONTROL_TEST}" ]]; then + printf "skip: %s not found!" "${MEMCONTROL_TEST}" + printf " Please build the kselftest cgroup target (or use -b).\n" + exit "${KSFT_SKIP}" + fi +} + +check_vng() { + local tested_versions=("1.36" "1.37") + local version + local ok=0 + + version="$(vng --version)" + for tv in "${tested_versions[@]}"; do + if [[ "${version}" == *"${tv}"* ]]; then + ok=1 + break + fi + done + + if [[ "${ok}" -eq 0 ]]; then + printf "warning: vng version '%s' has not been tested and may " "${version}" >&2 + printf "not function properly.\n\tThe following versions have been tested: " >&2 + echo "${tested_versions[@]}" >&2 + fi +} + +build_selftests() { + make -C "${KERNEL_CHECKOUT}" headers_install \ + INSTALL_HDR_PATH="${TMP_DIR}/usr" -j"$(nproc)" + + local khdr="-isystem ${TMP_DIR}/usr/include" + + if ! make -C "${SCRIPT_DIR}" KHDR_INCLUDES="${khdr}" -j"$(nproc)"; then + die "failed to build dmabuf-heaps selftests" + fi + + if ! make -C "${CGROUP_DIR}" KHDR_INCLUDES="${khdr}" \ + "${MEMCONTROL_TEST}" -j"$(nproc)"; then + die "failed to build cgroup/test_memcontrol selftest" + fi +} + +handle_build() { + if ! ${BUILD}; then + return + fi + + if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then + echo "-b requires vmtest.sh called from the kernel source tree" >&2 + exit 1 + fi + + pushd "${KERNEL_CHECKOUT}" &>/dev/null + + if ! vng --kconfig --config "${SCRIPT_DIR}/config"; then + die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})" + fi + + local vng_args=("-v" "--config" "${SCRIPT_DIR}/config" "--build") + + if [[ -n "${BUILD_HOST}" ]]; then + vng_args+=("--build-host" "${BUILD_HOST}") + fi + + if [[ -n "${BUILD_HOST_PODMAN_CONTAINER_NAME}" ]]; then + vng_args+=("--build-host-exec-prefix" \ + "podman exec -ti ${BUILD_HOST_PODMAN_CONTAINER_NAME}") + fi + + if ! vng "${vng_args[@]}"; then + die "failed to build kernel from source tree (${KERNEL_CHECKOUT})" + fi + + build_selftests + + popd &>/dev/null +} + +make_runner() { + # virtme-ng shares the host filesystem, so TMP_DIR is accessible + # inside the VM at the same absolute path. + cat > "${TMP_DIR}/run_tests.sh" <<-EOF + #!/bin/sh + set -u + PASS=0; FAIL=0; SKIP=0; N=0 + + run() { + name="$1"; shift + N=$((N+1)) + "$@"; rc=$? + if [ $rc -eq 0 ]; then echo "ok $N $name"; PASS=$((PASS+1)) + elif [ $rc -eq 4 ]; then echo "ok $N $name # SKIP"; SKIP=$((SKIP+1)) + else echo "not ok $N $name"; FAIL=$((FAIL+1)) + fi + } + + run "dmabuf-heap charge_pid_fd ioctl" ${DMABUF_HEAP_TEST} + run "memcontrol dma-buf memcg" ${MEMCONTROL_TEST} test_memcg_dmabuf + echo "# PASS=$PASS SKIP=$SKIP FAIL=$FAIL" + [ $FAIL -eq 0 ] + EOF + chmod +x "${TMP_DIR}/run_tests.sh" +} + +run_vm() { + local verbose_opt="" + local kernel_opt="" + + ${VERBOSE} && verbose_opt="--verbose" + + # If we are running from within the kernel source tree, use the kernel + # source tree as the kernel to boot, otherwise use the running kernel. + if [[ "$(realpath "$(pwd)")" == "${KERNEL_CHECKOUT}"* ]]; then + kernel_opt="${KERNEL_CHECKOUT}" + fi + + vng --run ${kernel_opt} ${verbose_opt} --user root --memory 512M \ + --exec "${TMP_DIR}/run_tests.sh" +} + +while getopts :hvbH:p: o +do + case $o in + v) VERBOSE=true;; + b) BUILD=true;; + H) BUILD_HOST=$OPTARG;; + p) BUILD_HOST_PODMAN_CONTAINER_NAME=$OPTARG;; + h|*) usage;; + esac +done +shift $((OPTIND-1)) + +trap cleanup EXIT + +check_vng +handle_build +check_deps +make_runner + +echo "Booting VM and running tests..." +run_vm