From: Quan Zhou zhouquan@iscas.ac.cn
Due to the path that modifies a0 in syscall_enter_from_user_mode before the actual execution of syscall_handler [1], the kernel currently saves a0 to orig_a0 at the entry point of do_trap_ecall_u as an original copy of a0. Once the syscall is interrupted and later resumed, the restarted syscall will use orig_a0 to continue execution.
The above rules generally apply except for ptrace(PTRACE_SETREGSET,), where the kernel will ignore the tracer's setting of tracee/a0 and will restart with the tracee/orig_a0. For the current kernel implementation of ptrace, projects like CRIU/Proot will encounter issues where the a0 setting becomes ineffective when performing ptrace(PTRACE_SETREGSET,).
Here is a suggested solution, expose orig_a0 to userspace so that ptrace can choose whether to set orig_a0 based on the actual scenario. In fact, x86/orig_eax and loongArch/orig_a0 have adopted similar solutions.
[1] link: https://lore.kernel.org/lkml/20230403-crisping-animosity-04ed8a45c625@spud/T...
--- Changes from RFC->v1: - Rebased on Linux 6.10-rc5. - Updated the patch description. - Adjust MAX_REG_OFFSET to match the new bottom of pt_regs (Charlie). - Simplify selftest to verify if a0 can be set (Charlie). - Fix .gitignore error (Charlie).
--- RFC link: https://lore.kernel.org/all/cover.1718693532.git.zhouquan@iscas.ac.cn/
Quan Zhou (2): riscv: Expose orig_a0 in the user_regs_struct structure riscv: selftests: Add a ptrace test to verify syscall parameter modification
arch/riscv/include/asm/ptrace.h | 7 +- arch/riscv/include/uapi/asm/ptrace.h | 2 + tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/abi/.gitignore | 1 + tools/testing/selftests/riscv/abi/Makefile | 12 ++ tools/testing/selftests/riscv/abi/ptrace.c | 124 +++++++++++++++++++ 6 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/riscv/abi/.gitignore create mode 100644 tools/testing/selftests/riscv/abi/Makefile create mode 100644 tools/testing/selftests/riscv/abi/ptrace.c
base-commit: f2661062f16b2de5d7b6a5c42a9a5c96326b8454
From: Quan Zhou zhouquan@iscas.ac.cn
Expose orig_a0 to userspace to ensure that users can modify the actual value of `a0` in the traced process through the ptrace(PTRACE_SETREGSET, ...) path.
The addition of orig_a0 also requires the following adjustments: 1) Adjust the position of orig_a0 in pt_regs to ensure correct copying. 2) MAX_REG_OFFSET should match the new bottom of pt_regs.
Suggested-by: Charlie Jenkins charlie@rivosinc.com Signed-off-by: Quan Zhou zhouquan@iscas.ac.cn --- arch/riscv/include/asm/ptrace.h | 7 ++++--- arch/riscv/include/uapi/asm/ptrace.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index b5b0adcc85c1..380cf54c1f3d 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -12,6 +12,7 @@
#ifndef __ASSEMBLY__
+/* MAX_REG_OFFSET should match the bottom of pt_regs */ struct pt_regs { unsigned long epc; unsigned long ra; @@ -45,12 +46,12 @@ struct pt_regs { unsigned long t4; unsigned long t5; unsigned long t6; + /* a0 value before the syscall */ + unsigned long orig_a0; /* Supervisor/Machine CSRs */ unsigned long status; unsigned long badaddr; unsigned long cause; - /* a0 value before the syscall */ - unsigned long orig_a0; };
#define PTRACE_SYSEMU 0x1f @@ -64,7 +65,7 @@ struct pt_regs {
#define user_mode(regs) (((regs)->status & SR_PP) == 0)
-#define MAX_REG_OFFSET offsetof(struct pt_regs, orig_a0) +#define MAX_REG_OFFSET offsetof(struct pt_regs, cause)
/* Helpers for working with the instruction pointer */ static inline unsigned long instruction_pointer(struct pt_regs *regs) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index a38268b19c3d..3e37f80cb3e8 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -54,6 +54,8 @@ struct user_regs_struct { unsigned long t4; unsigned long t5; unsigned long t6; + /* a0 value before the syscall */ + unsigned long orig_a0; };
struct __riscv_f_ext_state {
On Thu, Jun 27, 2024 at 11:02:46AM +0800, zhouquan@iscas.ac.cn wrote:
From: Quan Zhou zhouquan@iscas.ac.cn
Expose orig_a0 to userspace to ensure that users can modify the actual value of `a0` in the traced process through the ptrace(PTRACE_SETREGSET, ...) path.
The addition of orig_a0 also requires the following adjustments:
- Adjust the position of orig_a0 in pt_regs to ensure correct copying.
- MAX_REG_OFFSET should match the new bottom of pt_regs.
Suggested-by: Charlie Jenkins charlie@rivosinc.com Signed-off-by: Quan Zhou zhouquan@iscas.ac.cn
arch/riscv/include/asm/ptrace.h | 7 ++++--- arch/riscv/include/uapi/asm/ptrace.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index b5b0adcc85c1..380cf54c1f3d 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ +/* MAX_REG_OFFSET should match the bottom of pt_regs */ struct pt_regs { unsigned long epc; unsigned long ra; @@ -45,12 +46,12 @@ struct pt_regs { unsigned long t4; unsigned long t5; unsigned long t6;
- /* a0 value before the syscall */
- unsigned long orig_a0; /* Supervisor/Machine CSRs */ unsigned long status; unsigned long badaddr; unsigned long cause;
- /* a0 value before the syscall */
- unsigned long orig_a0;
}; #define PTRACE_SYSEMU 0x1f @@ -64,7 +65,7 @@ struct pt_regs { #define user_mode(regs) (((regs)->status & SR_PP) == 0) -#define MAX_REG_OFFSET offsetof(struct pt_regs, orig_a0) +#define MAX_REG_OFFSET offsetof(struct pt_regs, cause) /* Helpers for working with the instruction pointer */ static inline unsigned long instruction_pointer(struct pt_regs *regs) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index a38268b19c3d..3e37f80cb3e8 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -54,6 +54,8 @@ struct user_regs_struct { unsigned long t4; unsigned long t5; unsigned long t6;
- /* a0 value before the syscall */
- unsigned long orig_a0;
}; struct __riscv_f_ext_state { -- 2.34.1
Thank you!
Reviewed-by: Charlie Jenkins charlie@rivosinc.com
From: Quan Zhou zhouquan@iscas.ac.cn
This test checks that orig_a0 allows a syscall argument to be modified, and that changing a0 does not change the syscall argument.
Suggested-by: Charlie Jenkins charlie@rivosinc.com Signed-off-by: Quan Zhou zhouquan@iscas.ac.cn --- tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/abi/.gitignore | 1 + tools/testing/selftests/riscv/abi/Makefile | 12 ++ tools/testing/selftests/riscv/abi/ptrace.c | 124 +++++++++++++++++++ 4 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/riscv/abi/.gitignore create mode 100644 tools/testing/selftests/riscv/abi/Makefile create mode 100644 tools/testing/selftests/riscv/abi/ptrace.c
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 7ce03d832b64..98541dc2f164 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn +RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn abi else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/abi/.gitignore b/tools/testing/selftests/riscv/abi/.gitignore new file mode 100644 index 000000000000..d61c51358965 --- /dev/null +++ b/tools/testing/selftests/riscv/abi/.gitignore @@ -0,0 +1 @@ +ptrace diff --git a/tools/testing/selftests/riscv/abi/Makefile b/tools/testing/selftests/riscv/abi/Makefile new file mode 100644 index 000000000000..808d48a91ad7 --- /dev/null +++ b/tools/testing/selftests/riscv/abi/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited +# Originally tools/testing/arm64/abi/Makefile + +CFLAGS += -I$(top_srcdir)/tools/include + +TEST_GEN_PROGS := ptrace + +include ../../lib.mk + +$(OUTPUT)/ptrace: ptrace.c + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/abi/ptrace.c b/tools/testing/selftests/riscv/abi/ptrace.c new file mode 100644 index 000000000000..f85f927cd685 --- /dev/null +++ b/tools/testing/selftests/riscv/abi/ptrace.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <signal.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/ptrace.h> +#include <sys/stat.h> +#include <sys/user.h> +#include <sys/wait.h> +#include <sys/uio.h> +#include <linux/elf.h> +#include <linux/unistd.h> +#include <asm/ptrace.h> + +#include "../../kselftest_harness.h" + +#define ORIG_A0_MODIFY 0x01 +#define A0_MODIFY 0x02 +#define A0_OLD 0x03 +#define A0_NEW 0x04 + +#define perr_and_exit(fmt, ...) \ + ({ \ + char buf[256]; \ + snprintf(buf, sizeof(buf), "%s:%d: " fmt ": %m\n", \ + __func__, __LINE__, ##__VA_ARGS__); \ + perror(buf); \ + exit(-1); \ + }) + +static inline void resume_and_wait_tracee(pid_t pid, int flag) +{ + int status; + + if (ptrace(flag, pid, 0, 0)) + perr_and_exit("failed to resume the tracee %d\n", pid); + + if (waitpid(pid, &status, 0) != pid) + perr_and_exit("failed to wait for the tracee %d\n", pid); +} + +static void ptrace_test(int opt, int *result) +{ + int status; + pid_t pid; + struct user_regs_struct regs; + struct iovec iov = { + .iov_base = ®s, + .iov_len = sizeof(regs), + }; + + pid = fork(); + if (pid == 0) { + /* Mark oneself being traced */ + long val = ptrace(PTRACE_TRACEME, 0, 0, 0); + if (val) + perr_and_exit("failed to request for tracer to trace me: %ld\n", val); + + kill(getpid(), SIGSTOP); + + /* Perform exit syscall that will be intercepted */ + exit(A0_OLD); + } + if (pid < 0) + exit(1); + + if (waitpid(pid, &status, 0) != pid) + perr_and_exit("failed to wait for the tracee %d\n", pid); + + /* Stop at the entry point of the syscall */ + resume_and_wait_tracee(pid, PTRACE_SYSCALL); + + /* Check tracee orig_a0 before the syscall */ + if (ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov)) + perr_and_exit("failed to get tracee registers\n"); + if (regs.orig_a0 != A0_OLD) + perr_and_exit("unexpected orig_a0: 0x%lx\n", regs.orig_a0); + + /* Modify a0/orig_a0 for the syscall */ + switch (opt) { + case A0_MODIFY: + regs.a0 = A0_NEW; + break; + case ORIG_A0_MODIFY: + regs.orig_a0 = A0_NEW; + break; + } + + if (ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov)) + perr_and_exit("failed to set tracee registers\n"); + + /* Resume the tracee */ + ptrace(PTRACE_CONT, pid, 0, 0); + if (waitpid(pid, &status, 0) != pid) + perr_and_exit("failed to wait for the tracee\n"); + + *result = WEXITSTATUS(status); +} + +TEST(ptrace_modify_a0) +{ + int result; + + ptrace_test(A0_MODIFY, &result); + + /* The modification of a0 cannot affect the first argument of the syscall */ + EXPECT_EQ(A0_OLD, result); +} + +TEST(ptrace_modify_orig_a0) +{ + int result; + + ptrace_test(ORIG_A0_MODIFY, &result); + + /* Only modify orig_a0 to change the first argument of the syscall */ + EXPECT_EQ(A0_NEW, result); +} + +TEST_HARNESS_MAIN
On Thu, Jun 27, 2024 at 11:02:54AM +0800, zhouquan@iscas.ac.cn wrote:
From: Quan Zhou zhouquan@iscas.ac.cn
This test checks that orig_a0 allows a syscall argument to be modified, and that changing a0 does not change the syscall argument.
Suggested-by: Charlie Jenkins charlie@rivosinc.com Signed-off-by: Quan Zhou zhouquan@iscas.ac.cn
tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/abi/.gitignore | 1 + tools/testing/selftests/riscv/abi/Makefile | 12 ++ tools/testing/selftests/riscv/abi/ptrace.c | 124 +++++++++++++++++++ 4 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/riscv/abi/.gitignore create mode 100644 tools/testing/selftests/riscv/abi/Makefile create mode 100644 tools/testing/selftests/riscv/abi/ptrace.c
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 7ce03d832b64..98541dc2f164 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn +RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn abi else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/abi/.gitignore b/tools/testing/selftests/riscv/abi/.gitignore new file mode 100644 index 000000000000..d61c51358965 --- /dev/null +++ b/tools/testing/selftests/riscv/abi/.gitignore @@ -0,0 +1 @@ +ptrace diff --git a/tools/testing/selftests/riscv/abi/Makefile b/tools/testing/selftests/riscv/abi/Makefile new file mode 100644 index 000000000000..808d48a91ad7 --- /dev/null +++ b/tools/testing/selftests/riscv/abi/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited +# Originally tools/testing/arm64/abi/Makefile
+CFLAGS += -I$(top_srcdir)/tools/include
+TEST_GEN_PROGS := ptrace
+include ../../lib.mk
+$(OUTPUT)/ptrace: ptrace.c
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/abi/ptrace.c b/tools/testing/selftests/riscv/abi/ptrace.c new file mode 100644 index 000000000000..f85f927cd685 --- /dev/null +++ b/tools/testing/selftests/riscv/abi/ptrace.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <signal.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/ptrace.h> +#include <sys/stat.h> +#include <sys/user.h> +#include <sys/wait.h> +#include <sys/uio.h> +#include <linux/elf.h> +#include <linux/unistd.h> +#include <asm/ptrace.h>
+#include "../../kselftest_harness.h"
+#define ORIG_A0_MODIFY 0x01 +#define A0_MODIFY 0x02 +#define A0_OLD 0x03 +#define A0_NEW 0x04
+#define perr_and_exit(fmt, ...) \
- ({ \
char buf[256]; \
Apologies, I missed this in the last review. In the above line there is a space in the middle of the tabs before `char buf[256];`.
snprintf(buf, sizeof(buf), "%s:%d: " fmt ": %m\n", \
__func__, __LINE__, ##__VA_ARGS__); \
perror(buf); \
exit(-1); \
- })
+static inline void resume_and_wait_tracee(pid_t pid, int flag) +{
- int status;
- if (ptrace(flag, pid, 0, 0))
perr_and_exit("failed to resume the tracee %d\n", pid);
- if (waitpid(pid, &status, 0) != pid)
perr_and_exit("failed to wait for the tracee %d\n", pid);
+}
+static void ptrace_test(int opt, int *result) +{
- int status;
- pid_t pid;
- struct user_regs_struct regs;
- struct iovec iov = {
.iov_base = ®s,
.iov_len = sizeof(regs),
- };
- pid = fork();
- if (pid == 0) {
/* Mark oneself being traced */
long val = ptrace(PTRACE_TRACEME, 0, 0, 0);
if (val)
perr_and_exit("failed to request for tracer to trace me: %ld\n", val);
kill(getpid(), SIGSTOP);
/* Perform exit syscall that will be intercepted */
exit(A0_OLD);
- }
- if (pid < 0)
exit(1);
- if (waitpid(pid, &status, 0) != pid)
perr_and_exit("failed to wait for the tracee %d\n", pid);
- /* Stop at the entry point of the syscall */
- resume_and_wait_tracee(pid, PTRACE_SYSCALL);
- /* Check tracee orig_a0 before the syscall */
- if (ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov))
perr_and_exit("failed to get tracee registers\n");
- if (regs.orig_a0 != A0_OLD)
perr_and_exit("unexpected orig_a0: 0x%lx\n", regs.orig_a0);
- /* Modify a0/orig_a0 for the syscall */
- switch (opt) {
- case A0_MODIFY:
regs.a0 = A0_NEW;
break;
- case ORIG_A0_MODIFY:
regs.orig_a0 = A0_NEW;
break;
- }
- if (ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov))
perr_and_exit("failed to set tracee registers\n");
- /* Resume the tracee */
- ptrace(PTRACE_CONT, pid, 0, 0);
- if (waitpid(pid, &status, 0) != pid)
perr_and_exit("failed to wait for the tracee\n");
- *result = WEXITSTATUS(status);
+}
+TEST(ptrace_modify_a0) +{
- int result;
- ptrace_test(A0_MODIFY, &result);
- /* The modification of a0 cannot affect the first argument of the syscall */
- EXPECT_EQ(A0_OLD, result);
+}
+TEST(ptrace_modify_orig_a0) +{
- int result;
- ptrace_test(ORIG_A0_MODIFY, &result);
- /* Only modify orig_a0 to change the first argument of the syscall */
- EXPECT_EQ(A0_NEW, result);
+}
+TEST_HARNESS_MAIN
2.34.1
Reviewed-by: Charlie Jenkins charlie@rivosinc.com
linux-kselftest-mirror@lists.linaro.org