This series adds support for tests that use multiple devices, and adds
one new test, vfio_pci_device_init_perf_test, which measures parallel
device initialization time to demonstrate the improvement from commit
e908f58b6beb ("vfio/pci: Separate SR-IOV VF dev_set").
This series also breaks apart the monolithic vfio_util.h and
vfio_pci_device.c into separate files, to account for all the new code.
This required quite a bit of code motion so the diffstat looks large.
The final layout is more granular and provides a better separation of
the IOMMU code from the device code.
Final layout:
C files:
- tools/testing/selftests/vfio/lib/iommu.c
- tools/testing/selftests/vfio/lib/iova_allocator.c
- tools/testing/selftests/vfio/lib/libvfio.c
- tools/testing/selftests/vfio/lib/vfio_pci_device.c
- tools/testing/selftests/vfio/lib/vfio_pci_driver.c
H files:
- tools/testing/selftests/vfio/lib/include/libvfio.h
- tools/testing/selftests/vfio/lib/include/libvfio/assert.h
- tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
- tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
- tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
- tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h
Notably, vfio_util.h is now gone and replaced with libvfio.h.
This series is based on vfio/next plus Alex Mastro's series to add the
IOVA allocator [1]. It should apply cleanly to vfio/next once Alex's
series is merged into 6.18 and then into vfio/next.
This series can be found on GitHub:
https://github.com/dmatlack/linux/tree/vfio/selftests/init_perf_test/v2
[1] https://lore.kernel.org/kvm/20251111-iova-ranges-v3-0-7960244642c5@fb.com/
Cc: Alex Mastro <amastro(a)fb.com>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: Josh Hilke <jrhilke(a)google.com>
Cc: Raghavendra Rao Ananta <rananta(a)google.com>
Cc: Vipin Sharma <vipinsh(a)google.com>
v2:
- Require tests to call iommu_init() and manage struct iommu objects
rather than implicitly doing it in vfio_pci_device_init().
- Drop all the device wrappers for IOMMU methods and require tests to
interact with the iommu_*() helper functions directly.
- Add a commit to eliminate INVALID_IOVA. This is a simple cleanup I've
been meaning to make.
- Upgrade some driver logging to error (Raghavendra)
- Remove plurality from helper function that fetches BDF from
environment variable (Raghavendra)
- Fix cleanup.sh to only delete the device directory when cleaning up
all devices (Raghavendra)
v1: https://lore.kernel.org/kvm/20251008232531.1152035-1-dmatlack@google.com/
David Matlack (18):
vfio: selftests: Move run.sh into scripts directory
vfio: selftests: Split run.sh into separate scripts
vfio: selftests: Allow passing multiple BDFs on the command line
vfio: selftests: Rename struct vfio_iommu_mode to iommu_mode
vfio: selftests: Introduce struct iommu
vfio: selftests: Support multiple devices in the same
container/iommufd
vfio: selftests: Eliminate overly chatty logging
vfio: selftests: Prefix logs with device BDF where relevant
vfio: selftests: Upgrade driver logging to dev_err()
vfio: selftests: Rename struct vfio_dma_region to dma_region
vfio: selftests: Move IOMMU library code into iommu.c
vfio: selftests: Move IOVA allocator into iova_allocator.c
vfio: selftests: Stop passing device for IOMMU operations
vfio: selftests: Rename vfio_util.h to libvfio.h
vfio: selftests: Move vfio_selftests_*() helpers into libvfio.c
vfio: selftests: Split libvfio.h into separate header files
vfio: selftests: Eliminate INVALID_IOVA
vfio: selftests: Add vfio_pci_device_init_perf_test
tools/testing/selftests/vfio/Makefile | 9 +-
.../selftests/vfio/lib/drivers/dsa/dsa.c | 36 +-
.../selftests/vfio/lib/drivers/ioat/ioat.c | 18 +-
.../selftests/vfio/lib/include/libvfio.h | 26 +
.../vfio/lib/include/libvfio/assert.h | 54 ++
.../vfio/lib/include/libvfio/iommu.h | 76 +++
.../vfio/lib/include/libvfio/iova_allocator.h | 23 +
.../lib/include/libvfio/vfio_pci_device.h | 125 ++++
.../lib/include/libvfio/vfio_pci_driver.h | 97 +++
.../selftests/vfio/lib/include/vfio_util.h | 331 -----------
tools/testing/selftests/vfio/lib/iommu.c | 465 +++++++++++++++
.../selftests/vfio/lib/iova_allocator.c | 94 +++
tools/testing/selftests/vfio/lib/libvfio.c | 78 +++
tools/testing/selftests/vfio/lib/libvfio.mk | 5 +-
.../selftests/vfio/lib/vfio_pci_device.c | 555 +-----------------
.../selftests/vfio/lib/vfio_pci_driver.c | 16 +-
tools/testing/selftests/vfio/run.sh | 109 ----
.../testing/selftests/vfio/scripts/cleanup.sh | 41 ++
tools/testing/selftests/vfio/scripts/lib.sh | 42 ++
tools/testing/selftests/vfio/scripts/run.sh | 16 +
tools/testing/selftests/vfio/scripts/setup.sh | 48 ++
.../selftests/vfio/vfio_dma_mapping_test.c | 46 +-
.../selftests/vfio/vfio_iommufd_setup_test.c | 2 +-
.../vfio/vfio_pci_device_init_perf_test.c | 167 ++++++
.../selftests/vfio/vfio_pci_device_test.c | 12 +-
.../selftests/vfio/vfio_pci_driver_test.c | 51 +-
26 files changed, 1479 insertions(+), 1063 deletions(-)
create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio.h
create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/assert.h
create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h
delete mode 100644 tools/testing/selftests/vfio/lib/include/vfio_util.h
create mode 100644 tools/testing/selftests/vfio/lib/iommu.c
create mode 100644 tools/testing/selftests/vfio/lib/iova_allocator.c
create mode 100644 tools/testing/selftests/vfio/lib/libvfio.c
delete mode 100755 tools/testing/selftests/vfio/run.sh
create mode 100755 tools/testing/selftests/vfio/scripts/cleanup.sh
create mode 100755 tools/testing/selftests/vfio/scripts/lib.sh
create mode 100755 tools/testing/selftests/vfio/scripts/run.sh
create mode 100755 tools/testing/selftests/vfio/scripts/setup.sh
create mode 100644 tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c
base-commit: 0ed3a30fd996cb0cac872432cf25185fda7e5316
prerequisite-patch-id: dcf23dcc1198960bda3102eefaa21df60b2e4c54
prerequisite-patch-id: e32e56d5bf7b6c7dd40d737aa3521560407e00f5
prerequisite-patch-id: 4f79a41bf10a4c025ba5f433551b46035aa15878
prerequisite-patch-id: f903a45f0c32319138cd93a007646ab89132b18c
--
2.52.0.rc1.455.g30608eb744-goog
Main objective of this series is to convert the gro.sh and toeplitz.sh
tests to be "NIPA-compatible" - meaning make use of the Python env,
which lets us run the tests against either netdevsim or a real device.
The tests seem to have been written with a different flow in mind.
Namely they source different bash "setup" scripts depending on arguments
passed to the test. While I have nothing against the use of bash and
the overall architecture - the existing code needs quite a bit of work
(don't assume MAC/IP addresses, support remote endpoint over SSH).
If I'm the one fixing it, I'd rather convert them to our "simplistic"
Python.
This series rewrites the tests in Python while addressing their
shortcomings. The functionality of running the test over loopback
on a real device is retained but with a different method of invocation
(see the last patch).
Once again we are dealing with a script which run over a variety of
protocols (combination of [ipv4, ipv6, ipip] x [tcp, udp]). The first
4 patches add support for test variants to our scripts. We use the
term "variant" in the same sense as the C kselftest_harness.h -
variant is just a set of static input arguments.
Note that neither GRO nor the Toeplitz test fully passes for me on
any HW I have access to. But this is unrelated to the conversion.
This series is not making any real functional changes to the tests,
it is limited to improving the "test harness" scripts.
v2:
[patch 5] fix accidental modification of gitignore
[patch 8] fix typo in "compared"
[patch 9] fix typo I -> It
[patch 10] fix typoe configure -> configured
v1: https://lore.kernel.org/20251117205810.1617533-1-kuba@kernel.org
Jakub Kicinski (12):
selftests: net: py: coding style improvements
selftests: net: py: extract the case generation logic
selftests: net: py: add test variants
selftests: drv-net: xdp: use variants for qstat tests
selftests: net: relocate gro and toeplitz tests to drivers/net
selftests: net: py: support ksft ready without wait
selftests: net: py: read ip link info about remote dev
netdevsim: pass packets thru GRO on Rx
selftests: drv-net: add a Python version of the GRO test
selftests: drv-net: hw: convert the Toeplitz test to Python
netdevsim: add loopback support
selftests: net: remove old setup_* scripts
tools/testing/selftests/drivers/net/Makefile | 2 +
.../testing/selftests/drivers/net/hw/Makefile | 6 +-
tools/testing/selftests/net/Makefile | 7 -
tools/testing/selftests/net/lib/Makefile | 1 +
drivers/net/netdevsim/netdev.c | 26 ++-
.../testing/selftests/{ => drivers}/net/gro.c | 5 +-
.../{net => drivers/net/hw}/toeplitz.c | 7 +-
.../testing/selftests/drivers/net/.gitignore | 1 +
tools/testing/selftests/drivers/net/gro.py | 161 ++++++++++++++
.../selftests/drivers/net/hw/.gitignore | 1 +
.../drivers/net/hw/lib/py/__init__.py | 4 +-
.../selftests/drivers/net/hw/toeplitz.py | 208 ++++++++++++++++++
.../selftests/drivers/net/lib/py/__init__.py | 4 +-
.../selftests/drivers/net/lib/py/env.py | 2 +
tools/testing/selftests/drivers/net/xdp.py | 42 ++--
tools/testing/selftests/net/.gitignore | 2 -
tools/testing/selftests/net/gro.sh | 105 ---------
.../selftests/net/lib/ksft_setup_loopback.sh | 111 ++++++++++
.../testing/selftests/net/lib/py/__init__.py | 5 +-
tools/testing/selftests/net/lib/py/ksft.py | 93 ++++++--
tools/testing/selftests/net/lib/py/nsim.py | 2 +-
tools/testing/selftests/net/lib/py/utils.py | 20 +-
tools/testing/selftests/net/setup_loopback.sh | 120 ----------
tools/testing/selftests/net/setup_veth.sh | 45 ----
tools/testing/selftests/net/toeplitz.sh | 199 -----------------
.../testing/selftests/net/toeplitz_client.sh | 28 ---
26 files changed, 630 insertions(+), 577 deletions(-)
rename tools/testing/selftests/{ => drivers}/net/gro.c (99%)
rename tools/testing/selftests/{net => drivers/net/hw}/toeplitz.c (99%)
create mode 100755 tools/testing/selftests/drivers/net/gro.py
create mode 100755 tools/testing/selftests/drivers/net/hw/toeplitz.py
delete mode 100755 tools/testing/selftests/net/gro.sh
create mode 100755 tools/testing/selftests/net/lib/ksft_setup_loopback.sh
delete mode 100644 tools/testing/selftests/net/setup_loopback.sh
delete mode 100644 tools/testing/selftests/net/setup_veth.sh
delete mode 100755 tools/testing/selftests/net/toeplitz.sh
delete mode 100755 tools/testing/selftests/net/toeplitz_client.sh
--
2.51.1
This patch adds support for the Zalasr ISA extension, which supplies the
real load acquire/store release instructions.
The specification can be found here:
https://github.com/riscv/riscv-zalasr/blob/main/chapter2.adoc
This patch seires has been tested with ltp on Qemu with Brensan's zalasr
support patch[1].
Some false positive spacing error happens during patch checking. Thus I
CCed maintainers of checkpatch.pl as well.
[1] https://lore.kernel.org/all/CAGPSXwJEdtqW=nx71oufZp64nK6tK=0rytVEcz4F-gfvCO…
v4:
- Apply acquire/release semantics to arch_atomic operations. Thanks
to Andrea.
v3:
- Apply acquire/release semantics to arch_xchg/arch_cmpxchg operations
so as to ensure FENCE.TSO ordering between operations which precede the
UNLOCK+LOCK sequence and operations which follow the sequence. Thanks
to Andrea.
- Support hwprobe of Zalasr.
- Allow Zalasr extensions for Guest/VM.
v2:
- Adjust the order of Zalasr and Zalrsc in dt-bindings. Thanks to
Conor.
Xu Lu (10):
riscv: Add ISA extension parsing for Zalasr
dt-bindings: riscv: Add Zalasr ISA extension description
riscv: hwprobe: Export Zalasr extension
riscv: Introduce Zalasr instructions
riscv: Apply Zalasr to smp_load_acquire/smp_store_release
riscv: Apply acquire/release semantics to arch_xchg/arch_cmpxchg
operations
riscv: Apply acquire/release semantics to arch_atomic operations
riscv: Remove arch specific __atomic_acquire/release_fence
RISC-V: KVM: Allow Zalasr extensions for Guest/VM
RISC-V: KVM: selftests: Add Zalasr extensions to get-reg-list test
Documentation/arch/riscv/hwprobe.rst | 5 +-
.../devicetree/bindings/riscv/extensions.yaml | 5 +
arch/riscv/include/asm/atomic.h | 70 ++++++++-
arch/riscv/include/asm/barrier.h | 91 +++++++++--
arch/riscv/include/asm/cmpxchg.h | 144 +++++++++---------
arch/riscv/include/asm/fence.h | 4 -
arch/riscv/include/asm/hwcap.h | 1 +
arch/riscv/include/asm/insn-def.h | 79 ++++++++++
arch/riscv/include/uapi/asm/hwprobe.h | 1 +
arch/riscv/include/uapi/asm/kvm.h | 1 +
arch/riscv/kernel/cpufeature.c | 1 +
arch/riscv/kernel/sys_hwprobe.c | 1 +
arch/riscv/kvm/vcpu_onereg.c | 2 +
.../selftests/kvm/riscv/get-reg-list.c | 4 +
14 files changed, 314 insertions(+), 95 deletions(-)
--
2.20.1
Currently, guard regions are not visible to users except through
/proc/$pid/pagemap, with no explicit visibility at the VMA level.
This makes the feature less useful, as it isn't entirely apparent which
VMAs may have these entries present, especially when performing actions
which walk through memory regions such as those performed by CRIU.
This series addresses this issue by introducing the VM_MAYBE_GUARD flag
which fulfils this role, updating the smaps logic to display an entry for
these.
The semantics of this flag are that a guard region MAY be present if set
(we cannot be sure, as we can't efficiently track whether an
MADV_GUARD_REMOVE finally removes all the guard regions in a VMA) - but if
not set the VMA definitely does NOT have any guard regions present.
It's problematic to establish this flag without further action, because
that means that VMAs with guard regions in them become non-mergeable with
adjacent VMAs for no especially good reason.
To work around this, this series also introduces the concept of 'sticky'
VMA flags - that is flags which:
a. if set in one VMA and not in another still permit those VMAs to be
merged (if otherwise compatible).
b. When they are merged, the resultant VMA must have the flag set.
The VMA logic is updated to propagate these flags correctly.
Additionally, VM_MAYBE_GUARD being an explicit VMA flag allows us to solve
an issue with file-backed guard regions - previously these established an
anon_vma object for file-backed mappings solely to have vma_needs_copy()
correctly propagate guard region mappings to child processes.
We introduce a new flag alias VM_COPY_ON_FORK (which currently only
specifies VM_MAYBE_GUARD) and update vma_needs_copy() to check explicitly
for this flag and to copy page tables if it is present, which resolves this
issue.
Additionally, we add the ability for allow-listed VMA flags to be
atomically writable with only mmap/VMA read locks held.
The only flag we allow so far is VM_MAYBE_GUARD, which we carefully ensure
does not cause any races by being allowed to do so.
This allows us to maintain guard region installation as a read-locked
operation and not endure the overhead of obtaining a write lock here.
Finally we introduce extensive VMA userland tests to assert that the sticky
VMA logic behaves correctly as well as guard region self tests to assert
that smaps visibility is correctly implemented.
v3:
* Propagated tags thanks Vlastimil & Pedro! :)
* Fixed doc nit as per Pedro.
* Added vma_flag_test_atomic() in preparation for fixing
retract_page_tables() (see below). We make this not require any locks, as
we serialise on the page table lock in retract_page_tables().
* Split the atomic flag enablement and actually setting the flag for guard
install into two separate commits so we clearly separate the various VMA
flag implementation details and us enabling this feature.
* Mentioned setting anon_vma for anonymous mappings in commit message as
per Vlastimil.
* Fixed an issue with retract_page_tables() whereby madvise(...,
MADV_COLLAPSE) relies upon file-backed VMAs not being collapsed due to
the UFFD WP VMA flag being set or the VMA having vma->anon_vma set
(i.e. being a MAP_PRIVATE file-backed VMA). This was updated to also
check for VM_MAYBE_GUARD.
* Introduced MADV_COLLAPSE self test to assert that the behaviour is
correct. I first reproduced the issue locally and then adapted the test
to assert that this no longer occurs.
* Mentioned KCSAN permissiveness in commit message as per Pedro.
* Mentioned mmap/VMA read lock excluding mmap/VMA write lock and thus
avoiding meaningful RMW races in commit message as per Vlastimil.
* Mentioned previous unconditional vma->anon_vma installation on guard
region installation as per Vlastimil.
* Avoided having merging compromised by reordering patches such that the
sticky VMA functionality is implemented prior to VM_MAYBE_GUARD being
utilised upon guard region installation, rendering Vlastimil's request to
mention this in a commit message unnecessary.
* Separated out sticky and copy on fork patches as per Pedro.
* Added VM_PFNMAP, VM_MIXEDMAP, VM_UFFD_WP to VM_COPY_ON_FORK to make
things more consistent and clean.
* Added mention of why generally VM_STICKY should be VM_COPY_ON_FORK in
copy on fork patch.
v2:
* Separated out userland VMA tests for sticky behaviour as per Suren.
* Added the concept of atomic writable VMA flags as per Pedro and Vlastimil.
* Made VM_MAYBE_GUARD an atomic writable flag so we don't have to take a VMA
write lock in madvise() as per Pedro and Vlastimil.
https://lore.kernel.org/all/cover.1762422915.git.lorenzo.stoakes@oracle.com/
v1:
https://lore.kernel.org/all/cover.1761756437.git.lorenzo.stoakes@oracle.com/
Lorenzo Stoakes (8):
mm: introduce VM_MAYBE_GUARD and make visible in /proc/$pid/smaps
mm: add atomic VMA flags and set VM_MAYBE_GUARD as such
mm: implement sticky VMA flags
mm: introduce copy-on-fork VMAs and make VM_MAYBE_GUARD one
mm: set the VM_MAYBE_GUARD flag on guard region install
tools/testing/vma: add VMA sticky userland tests
tools/testing/selftests/mm: add MADV_COLLAPSE test case
tools/testing/selftests/mm: add smaps visibility guard region test
Documentation/filesystems/proc.rst | 5 +-
fs/proc/task_mmu.c | 1 +
include/linux/mm.h | 102 ++++++++++++
include/trace/events/mmflags.h | 1 +
mm/khugepaged.c | 72 +++++---
mm/madvise.c | 22 ++-
mm/memory.c | 14 +-
mm/vma.c | 22 +--
tools/testing/selftests/mm/guard-regions.c | 185 +++++++++++++++++++++
tools/testing/selftests/mm/vm_util.c | 5 +
tools/testing/selftests/mm/vm_util.h | 1 +
tools/testing/vma/vma.c | 89 ++++++++--
tools/testing/vma/vma_internal.h | 56 +++++++
13 files changed, 511 insertions(+), 64 deletions(-)
--
2.51.0
The vector regset uses the maximum possible vlenb 8192 to allocate a
2^18 bytes buffer to copy the vector register. But most platforms
don’t support the largest vlenb.
The regset has 2 users, ptrace syscall and coredump. When handling the
PTRACE_GETREGSET requests from ptrace syscall, Linux will prepare a
kernel buffer which size is min(user buffer size, limit). A malicious
user process might overwhelm a memory-constrainted system when the
buffer limit is very large. The coredump uses regset_get_alloc() to
get the context of vector register. But this API allocates buffer
before checking whether the target process uses vector extension, this
wastes time to prepare a large memory buffer.
The buffer limit can be determined after getting platform vlenb in the
early boot stage, this can let the regset buffer match real hardware
limits. Also add .active callbacks to let the coredump skip vector part
when target process doesn't use it.
After this patchset, userspace process needs 2 ptrace syscalls to
retrieve the vector regset with PTRACE_GETREGSET. The first ptrace call
only reads the header to get the vlenb information. Then prepare a
suitable buffer to get the register context. The new vector ptrace
kselftest demonstrates it.
---
v2:
- fix issues in vector ptrace kselftest (Andy)
Yong-Xuan Wang (2):
riscv: ptrace: Optimize the allocation of vector regset
selftests: riscv: Add test for the Vector ptrace interface
arch/riscv/include/asm/vector.h | 1 +
arch/riscv/kernel/ptrace.c | 24 +++-
arch/riscv/kernel/vector.c | 2 +
tools/testing/selftests/riscv/vector/Makefile | 5 +-
.../selftests/riscv/vector/vstate_ptrace.c | 134 ++++++++++++++++++
5 files changed, 162 insertions(+), 4 deletions(-)
create mode 100644 tools/testing/selftests/riscv/vector/vstate_ptrace.c
--
2.43.0
The user_notification_wait_killable_after_reply test fails due to an
unhandled error when a traced syscall is interrupted by a signal.
When a signal arrives after the tracer has received a seccomp
notification but before it has replied, the notification can become
stale. Any subsequent reply (like with SECCOMP_IOCTL_NOTIF_ADDFD)
will fail with -ENOENT.
This patch fixes the test by handling the -ENOENT return value from
SECCOMP_IOCTL_NOTIF_ADDFD, preventing the test from failing
incorrectly. The loop counter is decremented to re-run the iteration
for the restarted syscall.
Signed-off-by: Wake Liu <wakel(a)google.com>
---
tools/testing/selftests/seccomp/seccomp_bpf.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 574fdd102eb5..c3e598c9c4ee 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -5048,8 +5048,12 @@ TEST(user_notification_wait_killable_after_reply)
addfd.id = req.id;
addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
addfd.srcfd = 0;
- ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0)
- kill(pid, SIGKILL);
+ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ if (ret < 0 && errno == ENOENT) {
+ i--;
+ continue;
+ }
+ ASSERT_GE(ret, 0);
}
/*
--
2.52.0.rc1.455.g30608eb744-goog
Dzień dobry,
pomagamy przedsiębiorcom wprowadzić model wymiany walut, który minimalizuje wahania kosztów przy rozliczeniach międzynarodowych.
Kiedyv możemy umówić się na 15-minutową rozmowę, aby zaprezentować, jak taki model mógłby działać w Państwa firmie - z gwarancją indywidualnych kursów i pełnym uproszczeniem płatności? Proszę o propozycję dogodnego terminu.
Pozdrawiam
Marek Poradecki
This commit introduces checks for kernel version and seccomp filter flag
support to the seccomp selftests. It also includes conditional header
inclusions using __GLIBC_PREREQ.
Some tests were gated by kernel version, and adjustments were made for
flags introduced after kernel 5.4. This ensures the selftests can run
and pass correctly on kernel versions 5.4 and later, preventing failures
due to features not present in older kernels.
The use of __GLIBC_PREREQ ensures proper compilation and functionality
across different glibc versions in a mainline Linux kernel context.
While it might appear redundant in specific build environments due to
global overrides, it is crucial for upstream correctness and portability.
Signed-off-by: Wake Liu <wakel(a)google.com>
---
tools/testing/selftests/seccomp/seccomp_bpf.c | 108 ++++++++++++++++--
1 file changed, 99 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 61acbd45ffaa..9b660cff5a4a 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -13,12 +13,14 @@
* we need to use the kernel's siginfo.h file and trick glibc
* into accepting it.
*/
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
#if !__GLIBC_PREREQ(2, 26)
# include <asm/siginfo.h>
# define __have_siginfo_t 1
# define __have_sigval_t 1
# define __have_sigevent_t 1
#endif
+#endif
#include <errno.h>
#include <linux/filter.h>
@@ -300,6 +302,26 @@ int seccomp(unsigned int op, unsigned int flags, void *args)
}
#endif
+int seccomp_flag_supported(int flag)
+{
+ /*
+ * Probes if a seccomp filter flag is supported by the kernel.
+ *
+ * When an unsupported flag is passed to seccomp(SECCOMP_SET_MODE_FILTER, ...),
+ * the kernel returns EINVAL.
+ *
+ * When a supported flag is passed, the kernel proceeds to validate the
+ * filter program pointer. By passing NULL for the filter program,
+ * the kernel attempts to dereference a bad address, resulting in EFAULT.
+ *
+ * Therefore, checking for EFAULT indicates that the flag itself was
+ * recognized and supported by the kernel.
+ */
+ if (seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL) == -1 && errno == EFAULT)
+ return 1;
+ return 0;
+}
+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
@@ -2436,13 +2458,12 @@ TEST(detect_seccomp_filter_flags)
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
- EXPECT_EQ(-1, ret);
- EXPECT_EQ(EFAULT, errno) {
- TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
- flag);
- }
- all_flags |= flag;
+ if (seccomp_flag_supported(flag))
+ all_flags |= flag;
+ else
+ TH_LOG("Filter flag (0x%X) is not found to be supported!",
+ flag);
}
/*
@@ -2870,6 +2891,12 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
{
+ /* Depends on 5189149 (seccomp: allow TSYNC and USER_NOTIF together) */
+ if (!seccomp_flag_supported(SECCOMP_FILTER_FLAG_TSYNC_ESRCH)) {
+ SKIP(return, "Kernel does not support SECCOMP_FILTER_FLAG_TSYNC_ESRCH");
+ return;
+ }
+
long ret, flags;
void *status;
@@ -3475,6 +3502,11 @@ TEST(user_notification_basic)
TEST(user_notification_with_tsync)
{
+ /* Depends on 5189149 (seccomp: allow TSYNC and USER_NOTIF together) */
+ if (!seccomp_flag_supported(SECCOMP_FILTER_FLAG_TSYNC_ESRCH)) {
+ SKIP(return, "Kernel does not support SECCOMP_FILTER_FLAG_TSYNC_ESRCH");
+ return;
+ }
int ret;
unsigned int flags;
@@ -3966,6 +3998,13 @@ TEST(user_notification_filter_empty)
TEST(user_ioctl_notification_filter_empty)
{
+ /* Depends on 95036a7 (seccomp: interrupt SECCOMP_IOCTL_NOTIF_RECV
+ * when all users have exited) */
+ if (!ksft_min_kernel_version(6, 11)) {
+ SKIP(return, "Kernel version < 6.11");
+ return;
+ }
+
pid_t pid;
long ret;
int status, p[2];
@@ -4119,6 +4158,12 @@ int get_next_fd(int prev_fd)
TEST(user_notification_addfd)
{
+ /* Depends on 0ae71c7 (seccomp: Support atomic "addfd + send reply") */
+ if (!ksft_min_kernel_version(5, 14)) {
+ SKIP(return, "Kernel version < 5.14");
+ return;
+ }
+
pid_t pid;
long ret;
int status, listener, memfd, fd, nextfd;
@@ -4281,6 +4326,12 @@ TEST(user_notification_addfd)
TEST(user_notification_addfd_rlimit)
{
+ /* Depends on 7cf97b1 (seccomp: Introduce addfd ioctl to seccomp user notifier) */
+ if (!ksft_min_kernel_version(5, 9)) {
+ SKIP(return, "Kernel version < 5.9");
+ return;
+ }
+
pid_t pid;
long ret;
int status, listener, memfd;
@@ -4326,9 +4377,12 @@ TEST(user_notification_addfd_rlimit)
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EMFILE);
- addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
- EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
- EXPECT_EQ(errno, EMFILE);
+ /* Depends on 0ae71c7 (seccomp: Support atomic "addfd + send reply") */
+ if (ksft_min_kernel_version(5, 14)) {
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+ }
addfd.newfd = 100;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
@@ -4356,6 +4410,12 @@ TEST(user_notification_addfd_rlimit)
TEST(user_notification_sync)
{
+ /* Depends on 48a1084 (seccomp: add the synchronous mode for seccomp_unotify) */
+ if (!ksft_min_kernel_version(6, 6)) {
+ SKIP(return, "Kernel version < 6.6");
+ return;
+ }
+
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
int status, listener;
@@ -4520,6 +4580,12 @@ static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
TEST(user_notification_fifo)
{
+ /* Depends on 4cbf6f6 (seccomp: Use FIFO semantics to order notifications) */
+ if (!ksft_min_kernel_version(5, 19)) {
+ SKIP(return, "Kernel version < 5.19");
+ return;
+ }
+
struct seccomp_notif_resp resp = {};
struct seccomp_notif req = {};
int i, status, listener;
@@ -4623,6 +4689,12 @@ static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
/* Ensure non-fatal signals prior to receive are unmodified */
TEST(user_notification_wait_killable_pre_notification)
{
+ /* Depends on c2aa2df (seccomp: Add wait_killable semantic to seccomp user notifier) */
+ if (!ksft_min_kernel_version(5, 19)) {
+ SKIP(return, "Kernel version < 5.19");
+ return;
+ }
+
struct sigaction new_action = {
.sa_handler = signal_handler,
};
@@ -4693,6 +4765,12 @@ TEST(user_notification_wait_killable_pre_notification)
/* Ensure non-fatal signals after receive are blocked */
TEST(user_notification_wait_killable)
{
+ /* Depends on c2aa2df (seccomp: Add wait_killable semantic to seccomp user notifier) */
+ if (!ksft_min_kernel_version(5, 19)) {
+ SKIP(return, "Kernel version < 5.19");
+ return;
+ }
+
struct sigaction new_action = {
.sa_handler = signal_handler,
};
@@ -4772,6 +4850,12 @@ TEST(user_notification_wait_killable)
/* Ensure fatal signals after receive are not blocked */
TEST(user_notification_wait_killable_fatal)
{
+ /* Depends on c2aa2df (seccomp: Add wait_killable semantic to seccomp user notifier) */
+ if (!ksft_min_kernel_version(5, 19)) {
+ SKIP(return, "Kernel version < 5.19");
+ return;
+ }
+
struct seccomp_notif req = {};
int listener, status;
pid_t pid;
@@ -4854,6 +4938,12 @@ static void *tsync_vs_dead_thread_leader_sibling(void *_args)
*/
TEST(tsync_vs_dead_thread_leader)
{
+ /* Depends on bfafe5e (seccomp: release task filters when the task exits) */
+ if (!ksft_min_kernel_version(6, 11)) {
+ SKIP(return, "Kernel version < 6.11");
+ return;
+ }
+
int status;
pid_t pid;
long ret;
--
2.50.1.703.g449372360f-goog