PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of system calls the tracee is blocked in.
This API allows ptracers to obtain and modify system call details in a straightforward and architecture-agnostic way.
Current implementation supports changing only those bits of system call information that are used by strace, namely, syscall number, syscall arguments, and syscall return value.
Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO, such as instruction pointer and stack pointer, could be added later if needed, by re-using struct ptrace_syscall_info.reserved to specify the additional details that should be set. Currently, the reserved field of struct ptrace_syscall_info must be initialized with zeroes; arch, instruction_pointer, and stack_pointer fields are ignored.
PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY, PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations. Other operations could be added later if needed.
Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with PTRACE_GET_SYSCALL_INFO, but it didn't happen. The last straw that convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure to provide an API of changing the first system call argument on riscv architecture [1].
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data); ... PTRACE_SET_SYSCALL_INFO Modify information about the system call that caused the stop. The "data" argument is a pointer to struct ptrace_syscall_info that specifies the system call information to be set. The "addr" argument should be set to sizeof(struct ptrace_syscall_info)).
[1] https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
Dmitry V. Levin (6): Revert "arch: remove unused function syscall_set_arguments()" syscall.h: add syscall_set_arguments() on remaining HAVE_ARCH_TRACEHOOK arches syscall.h: introduce syscall_set_nr() ptrace_get_syscall_info: factor out ptrace_get_syscall_info_op ptrace: introduce PTRACE_SET_SYSCALL_INFO request selftests/ptrace: add a test case for PTRACE_SET_SYSCALL_INFO
arch/arc/include/asm/syscall.h | 20 + arch/arm/include/asm/syscall.h | 25 + arch/arm64/include/asm/syscall.h | 20 + arch/csky/include/asm/syscall.h | 13 + arch/hexagon/include/asm/syscall.h | 14 + arch/loongarch/include/asm/syscall.h | 15 + arch/m68k/include/asm/syscall.h | 7 + arch/microblaze/include/asm/syscall.h | 7 + arch/mips/include/asm/syscall.h | 53 +++ arch/nios2/include/asm/syscall.h | 16 + arch/openrisc/include/asm/syscall.h | 13 + arch/parisc/include/asm/syscall.h | 19 + arch/powerpc/include/asm/syscall.h | 15 + arch/riscv/include/asm/syscall.h | 16 + arch/s390/include/asm/syscall.h | 19 + arch/sh/include/asm/syscall_32.h | 19 + arch/sparc/include/asm/syscall.h | 17 + arch/um/include/asm/syscall-generic.h | 19 + arch/x86/include/asm/syscall.h | 43 ++ arch/xtensa/include/asm/syscall.h | 18 + include/asm-generic/syscall.h | 30 ++ include/linux/ptrace.h | 3 + include/uapi/linux/ptrace.h | 3 +- kernel/ptrace.c | 154 ++++++- tools/testing/selftests/ptrace/Makefile | 2 +- .../selftests/ptrace/set_syscall_info.c | 436 ++++++++++++++++++ 26 files changed, 994 insertions(+), 22 deletions(-) create mode 100644 tools/testing/selftests/ptrace/set_syscall_info.c
Check whether PTRACE_SET_SYSCALL_INFO semantics implemented in the kernel matches userspace expectations.
Signed-off-by: Dmitry V. Levin ldv@strace.io --- tools/testing/selftests/ptrace/Makefile | 2 +- .../selftests/ptrace/set_syscall_info.c | 436 ++++++++++++++++++ 2 files changed, 437 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/ptrace/set_syscall_info.c
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index 1c631740a730..c5e0b76ba6ac 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES)
-TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud +TEST_GEN_PROGS := get_syscall_info set_syscall_info peeksiginfo vmaccess get_set_sud
include ../lib.mk diff --git a/tools/testing/selftests/ptrace/set_syscall_info.c b/tools/testing/selftests/ptrace/set_syscall_info.c new file mode 100644 index 000000000000..f43294e1248e --- /dev/null +++ b/tools/testing/selftests/ptrace/set_syscall_info.c @@ -0,0 +1,436 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2018-2025 Dmitry V. Levin ldv@strace.io + * All rights reserved. + * + * Check whether PTRACE_SET_SYSCALL_INFO semantics implemented in the kernel + * matches userspace expectations. + */ + +#include "../kselftest_harness.h" +#include <err.h> +#include <fcntl.h> +#include <signal.h> +#include <asm/unistd.h> +#include "linux/ptrace.h" + +static int +kill_tracee(pid_t pid) +{ + if (!pid) + return 0; + + int saved_errno = errno; + + int rc = kill(pid, SIGKILL); + + errno = saved_errno; + return rc; +} + +static long +sys_ptrace(int request, pid_t pid, unsigned long addr, unsigned long data) +{ + return syscall(__NR_ptrace, request, pid, addr, data); +} + +#define LOG_KILL_TRACEE(fmt, ...) \ + do { \ + kill_tracee(pid); \ + TH_LOG("wait #%d: " fmt, \ + ptrace_stop, ##__VA_ARGS__); \ + } while (0) + +struct si_entry { + int nr; + unsigned long args[6]; +}; +struct si_exit { + unsigned int is_error; + int rval; +}; + +TEST(set_syscall_info) +{ + const pid_t tracer_pid = getpid(); + const unsigned long dummy[] = { + 0xbad0fed0, + 0xbad1fed1, + 0xbad2fed2, + 0xbad3fed3, + 0xbad4fed4, + 0xbad5fed5, + }; + int splice_in[2], splice_out[2]; + + ASSERT_EQ(0, pipe(splice_in)); + ASSERT_EQ(0, pipe(splice_out)); + ASSERT_EQ(sizeof(dummy), write(splice_in[1], dummy, sizeof(dummy))); + + const struct { + struct si_entry entry[2]; + struct si_exit exit[2]; + } si[] = { + /* change scno, keep non-error rval */ + { + { + { + __NR_gettid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_getppid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, tracer_pid }, { 0, tracer_pid } + } + }, + + /* set scno to -1, keep error rval */ + { + { + { + __NR_chdir, + { + (unsigned long) ".", dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + -1, + { + (unsigned long) ".", dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 1, -ENOSYS }, { 1, -ENOSYS } + } + }, + + /* keep scno, change non-error rval */ + { + { + { + __NR_getppid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_getppid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, tracer_pid }, { 0, tracer_pid + 1 } + } + }, + + /* change arg1, keep non-error rval */ + { + { + { + __NR_chdir, + { + (unsigned long) "", dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_chdir, + { + (unsigned long) ".", dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, 0 }, { 0, 0 } + } + }, + + /* set scno to -1, change error rval to non-error */ + { + { + { + __NR_gettid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + -1, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 1, -ENOSYS }, { 0, tracer_pid } + } + }, + + /* change scno, change non-error rval to error */ + { + { + { + __NR_chdir, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_getppid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, tracer_pid }, { 1, -EISDIR } + } + }, + + /* change scno and all args, change non-error rval */ + { + { + { + __NR_gettid, + { + splice_in[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_splice, + { + splice_in[0], 0, splice_out[1], 0, + sizeof(dummy), SPLICE_F_NONBLOCK + } + } + }, { + { 0, sizeof(dummy) }, { 0, sizeof(dummy) + 1 } + } + }, + + /* change arg1, no exit stop */ + { + { + { + __NR_exit_group, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_exit_group, + { + 0, dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, 0 }, { 0, 0 } + } + }, + }; + + long rc; + unsigned int i; + unsigned int ptrace_stop; + + pid_t pid = fork(); + + ASSERT_LE(0, pid) { + TH_LOG("fork: %m"); + } + + if (pid == 0) { + /* get the pid before PTRACE_TRACEME */ + pid = getpid(); + ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) { + TH_LOG("PTRACE_TRACEME: %m"); + } + ASSERT_EQ(0, kill(pid, SIGSTOP)) { + /* cannot happen */ + TH_LOG("kill SIGSTOP: %m"); + } + for (i = 0; i < ARRAY_SIZE(si); ++i) { + rc = syscall(si[i].entry[0].nr, + si[i].entry[0].args[0], + si[i].entry[0].args[1], + si[i].entry[0].args[2], + si[i].entry[0].args[3], + si[i].entry[0].args[4], + si[i].entry[0].args[5]); + if (si[i].exit[1].is_error) { + if (rc != -1 || errno != -si[i].exit[1].rval) + break; + } else { + if (rc != si[i].exit[1].rval) + break; + } + } + /* + * Something went wrong, but in this state tracee + * cannot reliably issue syscalls, so just crash. + */ + *(volatile unsigned char *) (unsigned long) i = 42; + /* unreachable */ + _exit(i + 1); + } + + for (ptrace_stop = 0; ; ++ptrace_stop) { + struct ptrace_syscall_info info = { + .op = 0xff /* invalid PTRACE_SYSCALL_INFO_* op */ + }; + const size_t size = sizeof(info); + const int expected_entry_size = + (void *) &info.entry.args[6] - (void *) &info; + const int expected_exit_size = + (void *) (&info.exit.is_error + 1) - + (void *) &info; + int status; + + ASSERT_EQ(pid, wait(&status)) { + /* cannot happen */ + LOG_KILL_TRACEE("wait: %m"); + } + if (WIFEXITED(status)) { + pid = 0; /* the tracee is no more */ + ASSERT_EQ(0, WEXITSTATUS(status)) { + LOG_KILL_TRACEE("unexpected exit status %u", + WEXITSTATUS(status)); + } + break; + } + ASSERT_FALSE(WIFSIGNALED(status)) { + pid = 0; /* the tracee is no more */ + LOG_KILL_TRACEE("unexpected signal %u", + WTERMSIG(status)); + } + ASSERT_TRUE(WIFSTOPPED(status)) { + /* cannot happen */ + LOG_KILL_TRACEE("unexpected wait status %#x", status); + } + + ASSERT_LT(ptrace_stop, ARRAY_SIZE(si) * 2) { + LOG_KILL_TRACEE("ptrace stop overflow"); + } + + switch (WSTOPSIG(status)) { + case SIGSTOP: + ASSERT_EQ(0, ptrace_stop) { + LOG_KILL_TRACEE("unexpected signal stop"); + } + ASSERT_EQ(0, sys_ptrace(PTRACE_SETOPTIONS, pid, 0, + PTRACE_O_TRACESYSGOOD)) { + LOG_KILL_TRACEE("PTRACE_SETOPTIONS: %m"); + } + break; + + case SIGTRAP | 0x80: + ASSERT_LT(0, ptrace_stop) { + LOG_KILL_TRACEE("unexpected syscall stop"); + } + ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, + pid, size, + (unsigned long) &info))) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m"); + } + if (ptrace_stop & 1) { + /* entering syscall */ + const struct si_entry *exp_entry = + &si[ptrace_stop / 2].entry[0]; + const struct si_entry *set_entry = + &si[ptrace_stop / 2].entry[1]; + + ASSERT_EQ(expected_entry_size, rc) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(PTRACE_SYSCALL_INFO_ENTRY, info.op) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_TRUE(info.arch) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_TRUE(info.instruction_pointer) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_TRUE(info.stack_pointer) { + LOG_KILL_TRACEE("entry stop mismatch"); + } + ASSERT_EQ(exp_entry->nr, info.entry.nr) { + LOG_KILL_TRACEE("syscall nr mismatch"); + } + for (i = 0; i < ARRAY_SIZE(exp_entry->args); ++i) { + ASSERT_EQ(exp_entry->args[i], info.entry.args[i]) { + LOG_KILL_TRACEE("syscall arg #%u mismatch", i); + } + } + info.entry.nr = set_entry->nr; + for (i = 0; i < ARRAY_SIZE(set_entry->args); ++i) + info.entry.args[i] = set_entry->args[i]; + ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO, + pid, size, + (unsigned long) &info)) { + LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m"); + } + } else { + /* exiting syscall */ + const struct si_exit *exp_exit = + &si[ptrace_stop / 2 - 1].exit[0]; + const struct si_exit *set_exit = + &si[ptrace_stop / 2 - 1].exit[1]; + + ASSERT_EQ(expected_exit_size, rc) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_EQ(PTRACE_SYSCALL_INFO_EXIT, info.op) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_TRUE(info.arch) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_TRUE(info.instruction_pointer) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_TRUE(info.stack_pointer) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_EQ(exp_exit->is_error, info.exit.is_error) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + ASSERT_EQ(exp_exit->rval, info.exit.rval) { + LOG_KILL_TRACEE("exit stop mismatch"); + } + info.exit.is_error = set_exit->is_error; + info.exit.rval = set_exit->rval; + ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO, + pid, size, + (unsigned long) &info)) { + LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m"); + } + } + break; + + default: + LOG_KILL_TRACEE("unexpected stop signal %u", + WSTOPSIG(status)); + abort(); + } + + ASSERT_EQ(0, sys_ptrace(PTRACE_SYSCALL, pid, 0, 0)) { + LOG_KILL_TRACEE("PTRACE_SYSCALL: %m"); + } + } + + ASSERT_EQ(ptrace_stop, ARRAY_SIZE(si) * 2); +} + +TEST_HARNESS_MAIN
This would seem like a very good idea. However, it is perhaps important to realize that it doesn't fully eliminate the problems with 64-bit arguments on 32-bit ABIs being handled differently (never mind inconsistencies in system call ABIs etc.) There isn't all that much that can be done about that directly, though.
-hpa
On 1/7/25 15:01, Dmitry V. Levin wrote:
PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of system calls the tracee is blocked in.
This API allows ptracers to obtain and modify system call details in a straightforward and architecture-agnostic way.
Current implementation supports changing only those bits of system call information that are used by strace, namely, syscall number, syscall arguments, and syscall return value.
Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO, such as instruction pointer and stack pointer, could be added later if needed, by re-using struct ptrace_syscall_info.reserved to specify the additional details that should be set. Currently, the reserved field of struct ptrace_syscall_info must be initialized with zeroes; arch, instruction_pointer, and stack_pointer fields are ignored.
PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY, PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations. Other operations could be added later if needed.
Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with PTRACE_GET_SYSCALL_INFO, but it didn't happen. The last straw that convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure to provide an API of changing the first system call argument on riscv architecture [1].
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data); ... PTRACE_SET_SYSCALL_INFO Modify information about the system call that caused the stop. The "data" argument is a pointer to struct ptrace_syscall_info that specifies the system call information to be set. The "addr" argument should be set to sizeof(struct ptrace_syscall_info)).
[1] https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
Dmitry V. Levin (6): Revert "arch: remove unused function syscall_set_arguments()" syscall.h: add syscall_set_arguments() on remaining HAVE_ARCH_TRACEHOOK arches syscall.h: introduce syscall_set_nr() ptrace_get_syscall_info: factor out ptrace_get_syscall_info_op ptrace: introduce PTRACE_SET_SYSCALL_INFO request selftests/ptrace: add a test case for PTRACE_SET_SYSCALL_INFO
arch/arc/include/asm/syscall.h | 20 + arch/arm/include/asm/syscall.h | 25 + arch/arm64/include/asm/syscall.h | 20 + arch/csky/include/asm/syscall.h | 13 + arch/hexagon/include/asm/syscall.h | 14 + arch/loongarch/include/asm/syscall.h | 15 + arch/m68k/include/asm/syscall.h | 7 + arch/microblaze/include/asm/syscall.h | 7 + arch/mips/include/asm/syscall.h | 53 +++ arch/nios2/include/asm/syscall.h | 16 + arch/openrisc/include/asm/syscall.h | 13 + arch/parisc/include/asm/syscall.h | 19 + arch/powerpc/include/asm/syscall.h | 15 + arch/riscv/include/asm/syscall.h | 16 + arch/s390/include/asm/syscall.h | 19 + arch/sh/include/asm/syscall_32.h | 19 + arch/sparc/include/asm/syscall.h | 17 + arch/um/include/asm/syscall-generic.h | 19 + arch/x86/include/asm/syscall.h | 43 ++ arch/xtensa/include/asm/syscall.h | 18 + include/asm-generic/syscall.h | 30 ++ include/linux/ptrace.h | 3 + include/uapi/linux/ptrace.h | 3 +- kernel/ptrace.c | 154 ++++++- tools/testing/selftests/ptrace/Makefile | 2 +- .../selftests/ptrace/set_syscall_info.c | 436 ++++++++++++++++++ 26 files changed, 994 insertions(+), 22 deletions(-) create mode 100644 tools/testing/selftests/ptrace/set_syscall_info.c
linux-kselftest-mirror@lists.linaro.org