Since, syscall wrapper is supported on powerpc with syscall entry symbols as sys_*, changes done to fix selftests like test_bpf_syscall_macro, attach_probe/auto, etc.
Saket Kumar Bhaskar (3): libbpf: Fix accessing the syscall argument on powerpc libbpf: Remove powerpc prefix from syscall function names selftests/bpf: Define SYS_PREFIX for powerpc
tools/lib/bpf/bpf_tracing.h | 9 +++++++-- tools/lib/bpf/libbpf.c | 10 ++++++++-- tools/testing/selftests/bpf/progs/bpf_misc.h | 3 +++ 3 files changed, 18 insertions(+), 4 deletions(-)
Since commit 7e92e01b7245 ("powerpc: Provide syscall wrapper"), powerpc selects ARCH_HAS_SYSCALL_WRAPPER so let's use the generic implementation of PT_REGS_SYSCALL_REGS().
Also, allow overriding PT_REGS_PARM1{_CORE}_SYSCALL for powerpc as powerpc needs orig_gpr3, similar to s390 and arm64.
Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com --- tools/lib/bpf/bpf_tracing.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index a8f6cd4841b0..933e1dab6c8f 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -287,6 +287,10 @@ struct pt_regs___arm64 { * section "Function Calling Sequence") */
+struct pt_regs___powerpc { + unsigned long orig_gpr3; +} __attribute__((preserve_access_index)); + #define __PT_PARM1_REG gpr[3] #define __PT_PARM2_REG gpr[4] #define __PT_PARM3_REG gpr[5] @@ -296,8 +300,6 @@ struct pt_regs___arm64 { #define __PT_PARM7_REG gpr[9] #define __PT_PARM8_REG gpr[10]
-/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */ -#define PT_REGS_SYSCALL_REGS(ctx) ctx #define __PT_PARM1_SYSCALL_REG orig_gpr3 #define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG #define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG @@ -307,6 +309,9 @@ struct pt_regs___arm64 { #if !defined(__arch64__) #define __PT_PARM7_SYSCALL_REG __PT_PARM7_REG /* only powerpc (not powerpc64) */ #endif +#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___powerpc *)(x))->orig_gpr3) +#define PT_REGS_PARM1_CORE_SYSCALL(x) \ + BPF_CORE_READ((const struct pt_regs___powerpc *)(x), __PT_PARM1_SYSCALL_REG)
#define __PT_RET_REG regs[31] #define __PT_FP_REG __unsupported__
Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to syscall entry points") drops _powerpc prefix to syscall entry points, even though powerpc now supports syscall wrapper, so /proc/kallsyms have symbols for syscall entry without powerpc prefix(sys_*).
For this reason, arch specific prefix for syscall functions in powerpc is dropped.
Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com --- tools/lib/bpf/libbpf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..3a370fa37d8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) #elif defined(__riscv) return "riscv"; #elif defined(__powerpc__) - return "powerpc"; -#elif defined(__powerpc64__) - return "powerpc64"; + return ""; #else return NULL; #endif @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) if (!ksys_pfx) return 0;
+#if defined(__powerpc__) + snprintf(syscall_name, sizeof(syscall_name), "sys_bpf"); +#else snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); +#endif
if (determine_kprobe_perf_type() >= 0) { int pfd; @@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, * compiler does not know that we have an explicit conditional * as well. */ +#if defined(__powerpc__) + snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name); +#else snprintf(func_name, sizeof(func_name), "__%s_sys_%s", arch_specific_syscall_pfx() ? : "", syscall_name); +#endif } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); }
On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to syscall entry points") drops _powerpc prefix to syscall entry points, even though powerpc now supports syscall wrapper, so /proc/kallsyms have symbols for syscall entry without powerpc prefix(sys_*).
For this reason, arch specific prefix for syscall functions in powerpc is dropped.
Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com
tools/lib/bpf/libbpf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..3a370fa37d8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) #elif defined(__riscv) return "riscv"; #elif defined(__powerpc__)
return "powerpc";
-#elif defined(__powerpc64__)
return "powerpc64";
return "";
#else return NULL; #endif @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) if (!ksys_pfx) return 0;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "sys_bpf");
+#else snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); +#endif
The problem is that on older versions of kernel it will have this prefix, while on newer ones it won't. So to not break anything on old kernels, we'd need to do feature detection and pick whether to use prefix or not, right?
So it seems like this change needs a bit more work.
pw-bot: cr
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, * compiler does not know that we have an explicit conditional * as well. */ +#if defined(__powerpc__)
snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name);
+#else snprintf(func_name, sizeof(func_name), "__%s_sys_%s", arch_specific_syscall_pfx() ? : "", syscall_name); +#endif } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); } -- 2.43.5
On Fri, Nov 08, 2024 at 10:43:54AM -0800, Andrii Nakryiko wrote:
On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to syscall entry points") drops _powerpc prefix to syscall entry points, even though powerpc now supports syscall wrapper, so /proc/kallsyms have symbols for syscall entry without powerpc prefix(sys_*).
For this reason, arch specific prefix for syscall functions in powerpc is dropped.
Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com
tools/lib/bpf/libbpf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..3a370fa37d8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) #elif defined(__riscv) return "riscv"; #elif defined(__powerpc__)
return "powerpc";
-#elif defined(__powerpc64__)
return "powerpc64";
return "";
#else return NULL; #endif @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) if (!ksys_pfx) return 0;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "sys_bpf");
+#else snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); +#endif
The problem is that on older versions of kernel it will have this prefix, while on newer ones it won't. So to not break anything on old kernels, we'd need to do feature detection and pick whether to use prefix or not, right?
So it seems like this change needs a bit more work.
pw-bot: cr
Hi Andrii,
IMO since both the patches 7e92e01b7245(powerpc: Provide syscall wrapper) and 94746890202cf(powerpc: Don't add __powerpc_ prefix to syscall entry points) went into the same kernel version v6.1-rc1, there won't me much kernel versions that has only one of these patches.
Also, to test more I tried this patch with ARCH_HAS_SYSCALL_WRAPPER disabled, and it the test passed in this case too.
Thanks, Saket
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, * compiler does not know that we have an explicit conditional * as well. */ +#if defined(__powerpc__)
snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name);
+#else snprintf(func_name, sizeof(func_name), "__%s_sys_%s", arch_specific_syscall_pfx() ? : "", syscall_name); +#endif } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); } -- 2.43.5
On Wed, Nov 20, 2024 at 6:52 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Fri, Nov 08, 2024 at 10:43:54AM -0800, Andrii Nakryiko wrote:
On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to syscall entry points") drops _powerpc prefix to syscall entry points, even though powerpc now supports syscall wrapper, so /proc/kallsyms have symbols for syscall entry without powerpc prefix(sys_*).
For this reason, arch specific prefix for syscall functions in powerpc is dropped.
Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com
tools/lib/bpf/libbpf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..3a370fa37d8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) #elif defined(__riscv) return "riscv"; #elif defined(__powerpc__)
return "powerpc";
-#elif defined(__powerpc64__)
return "powerpc64";
return "";
#else return NULL; #endif @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) if (!ksys_pfx) return 0;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "sys_bpf");
+#else snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); +#endif
The problem is that on older versions of kernel it will have this prefix, while on newer ones it won't. So to not break anything on old kernels, we'd need to do feature detection and pick whether to use prefix or not, right?
So it seems like this change needs a bit more work.
pw-bot: cr
Hi Andrii,
IMO since both the patches 7e92e01b7245(powerpc: Provide syscall wrapper) and 94746890202cf(powerpc: Don't add __powerpc_ prefix to syscall entry points) went into the same kernel version v6.1-rc1, there won't me much kernel versions that has only one of these patches.
Also, to test more I tried this patch with ARCH_HAS_SYSCALL_WRAPPER disabled, and it the test passed in this case too.
Keep in mind that libbpf is supposed to work across many kernel versions. So as long as there are powerpc (old) kernels that do use arch-specific prefix, we need to detect them and supply prefix when attaching ksyscall programs.
Thanks, Saket
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, * compiler does not know that we have an explicit conditional * as well. */ +#if defined(__powerpc__)
snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name);
+#else snprintf(func_name, sizeof(func_name), "__%s_sys_%s", arch_specific_syscall_pfx() ? : "", syscall_name); +#endif } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); } -- 2.43.5
On Thu, Nov 21, 2024 at 04:00:13PM -0800, Andrii Nakryiko wrote:
On Wed, Nov 20, 2024 at 6:52 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Fri, Nov 08, 2024 at 10:43:54AM -0800, Andrii Nakryiko wrote:
On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to syscall entry points") drops _powerpc prefix to syscall entry points, even though powerpc now supports syscall wrapper, so /proc/kallsyms have symbols for syscall entry without powerpc prefix(sys_*).
For this reason, arch specific prefix for syscall functions in powerpc is dropped.
Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com
tools/lib/bpf/libbpf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..3a370fa37d8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) #elif defined(__riscv) return "riscv"; #elif defined(__powerpc__)
return "powerpc";
-#elif defined(__powerpc64__)
return "powerpc64";
return "";
#else return NULL; #endif @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) if (!ksys_pfx) return 0;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "sys_bpf");
+#else snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); +#endif
The problem is that on older versions of kernel it will have this prefix, while on newer ones it won't. So to not break anything on old kernels, we'd need to do feature detection and pick whether to use prefix or not, right?
So it seems like this change needs a bit more work.
pw-bot: cr
Hi Andrii,
IMO since both the patches 7e92e01b7245(powerpc: Provide syscall wrapper) and 94746890202cf(powerpc: Don't add __powerpc_ prefix to syscall entry points) went into the same kernel version v6.1-rc1, there won't me much kernel versions that has only one of these patches.
Also, to test more I tried this patch with ARCH_HAS_SYSCALL_WRAPPER disabled, and it the test passed in this case too.
Keep in mind that libbpf is supposed to work across many kernel versions. So as long as there are powerpc (old) kernels that do use arch-specific prefix, we need to detect them and supply prefix when attaching ksyscall programs.
Hi Andrii,
Sorry about the delayed response, I have started looking at this after a vacation.
There are unlikely to be any old kernels that use arch-specific prefix as syscall wrapper support was added to powerpc in v6.1 and commit 94746890202cf that dropped the prefix also went into the same kernel release (v6.1-rc1). So, is it worth it support both sys_bpf and __powerpc_sys_bpf cases?
But yes, there can be a kernel without syscall wrapper but having the sys_bpf symbol. So, how about identifying syscall wrapper enablement with __se_sys_bpf instead:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 66173ddb5a2d..ff69a30cfe9b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11163,11 +11163,15 @@ int probe_kern_syscall_wrapper(int token_fd) char syscall_name[64]; const char *ksys_pfx;
+#if defined(__powerpc__) + snprintf(syscall_name, sizeof(syscall_name), "__se_sys_bpf", ksys_pfx); +#else ksys_pfx = arch_specific_syscall_pfx(); if (!ksys_pfx) return 0;
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); +#endif
if (determine_kprobe_perf_type() >= 0) { int pfd; @@ -11176,16 +11180,28 @@ int probe_kern_syscall_wrapper(int token_fd) if (pfd >= 0) close(pfd);
+#if defined(__powerpc__) return pfd >= 0 ? 1 : 0; +#else + return pfd >= 0 ? 1 : 0; +#endif } else { /* legacy mode */ char probe_name[128];
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) +#if defined(__powerpc__) + return 1; +#else return 0; +#endif
(void)remove_kprobe_event_legacy(probe_name, false); +#if defined(__powerpc__) + return 0; +#else return 1; +#endif } }
Actually, all architectures could use this '__se_' prefix instead of arch specific prefix to identify if syscall wrapper is enabled. Separate way to handle powerpc case may not be needed. Will wait for your inputs to send v2.
Thanks, Saket
Thanks, Saket
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, * compiler does not know that we have an explicit conditional * as well. */ +#if defined(__powerpc__)
snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name);
+#else snprintf(func_name, sizeof(func_name), "__%s_sys_%s", arch_specific_syscall_pfx() ? : "", syscall_name); +#endif } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); } -- 2.43.5
On Fri, Jan 10, 2025 at 2:49 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Thu, Nov 21, 2024 at 04:00:13PM -0800, Andrii Nakryiko wrote:
On Wed, Nov 20, 2024 at 6:52 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Fri, Nov 08, 2024 at 10:43:54AM -0800, Andrii Nakryiko wrote:
On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to syscall entry points") drops _powerpc prefix to syscall entry points, even though powerpc now supports syscall wrapper, so /proc/kallsyms have symbols for syscall entry without powerpc prefix(sys_*).
For this reason, arch specific prefix for syscall functions in powerpc is dropped.
Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com
tools/lib/bpf/libbpf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..3a370fa37d8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) #elif defined(__riscv) return "riscv"; #elif defined(__powerpc__)
return "powerpc";
-#elif defined(__powerpc64__)
return "powerpc64";
return "";
#else return NULL; #endif @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) if (!ksys_pfx) return 0;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "sys_bpf");
+#else snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); +#endif
The problem is that on older versions of kernel it will have this prefix, while on newer ones it won't. So to not break anything on old kernels, we'd need to do feature detection and pick whether to use prefix or not, right?
So it seems like this change needs a bit more work.
pw-bot: cr
Hi Andrii,
IMO since both the patches 7e92e01b7245(powerpc: Provide syscall wrapper) and 94746890202cf(powerpc: Don't add __powerpc_ prefix to syscall entry points) went into the same kernel version v6.1-rc1, there won't me much kernel versions that has only one of these patches.
Also, to test more I tried this patch with ARCH_HAS_SYSCALL_WRAPPER disabled, and it the test passed in this case too.
Keep in mind that libbpf is supposed to work across many kernel versions. So as long as there are powerpc (old) kernels that do use arch-specific prefix, we need to detect them and supply prefix when attaching ksyscall programs.
Hi Andrii,
Sorry about the delayed response, I have started looking at this after a vacation.
There are unlikely to be any old kernels that use arch-specific prefix as syscall wrapper support was added to powerpc in v6.1 and commit 94746890202cf that dropped the prefix also went into the same kernel release (v6.1-rc1). So, is it worth it support both sys_bpf and __powerpc_sys_bpf cases?
But yes, there can be a kernel without syscall wrapper but having the sys_bpf symbol. So, how about identifying syscall wrapper enablement with __se_sys_bpf instead:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 66173ddb5a2d..ff69a30cfe9b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11163,11 +11163,15 @@ int probe_kern_syscall_wrapper(int token_fd) char syscall_name[64]; const char *ksys_pfx;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "__se_sys_bpf", ksys_pfx);
+#else ksys_pfx = arch_specific_syscall_pfx(); if (!ksys_pfx) return 0;
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
+#endif
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11176,16 +11180,28 @@ int probe_kern_syscall_wrapper(int token_fd) if (pfd >= 0) close(pfd);
+#if defined(__powerpc__) return pfd >= 0 ? 1 : 0; +#else
return pfd >= 0 ? 1 : 0;
+#endif } else { /* legacy mode */ char probe_name[128];
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
+#if defined(__powerpc__)
return 1;
+#else return 0; +#endif
(void)remove_kprobe_event_legacy(probe_name, false);
+#if defined(__powerpc__)
return 0;
+#else return 1; +#endif } }
Actually, all architectures could use this '__se_' prefix instead of arch specific prefix to identify if syscall wrapper is enabled. Separate way to handle powerpc case may not be needed. Will wait for your inputs to send v2.
the problem is that __se_sys_bpf is not traceable (it's a static function), so it seems like this won't work
it's been a while, let me try to clarify my understanding of the issue. The problem is that powerpc is special in that when syscall wrapper is used, then, unlike all other architectures, they opted to not have arch-specific prefix for syscall wrappers, is that right? and that's why all the dancing you are trying to add. Am I right?
Thanks, Saket
Thanks, Saket
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, * compiler does not know that we have an explicit conditional * as well. */ +#if defined(__powerpc__)
snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name);
+#else snprintf(func_name, sizeof(func_name), "__%s_sys_%s", arch_specific_syscall_pfx() ? : "", syscall_name); +#endif } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); } -- 2.43.5
CCing Maddy and MPE On Fri, Jan 10, 2025 at 02:29:42PM -0800, Andrii Nakryiko wrote:
On Fri, Jan 10, 2025 at 2:49 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Thu, Nov 21, 2024 at 04:00:13PM -0800, Andrii Nakryiko wrote:
On Wed, Nov 20, 2024 at 6:52 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Fri, Nov 08, 2024 at 10:43:54AM -0800, Andrii Nakryiko wrote:
On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to syscall entry points") drops _powerpc prefix to syscall entry points, even though powerpc now supports syscall wrapper, so /proc/kallsyms have symbols for syscall entry without powerpc prefix(sys_*).
For this reason, arch specific prefix for syscall functions in powerpc is dropped.
Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com
tools/lib/bpf/libbpf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..3a370fa37d8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) #elif defined(__riscv) return "riscv"; #elif defined(__powerpc__)
return "powerpc";
-#elif defined(__powerpc64__)
return "powerpc64";
return "";
#else return NULL; #endif @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) if (!ksys_pfx) return 0;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "sys_bpf");
+#else snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); +#endif
The problem is that on older versions of kernel it will have this prefix, while on newer ones it won't. So to not break anything on old kernels, we'd need to do feature detection and pick whether to use prefix or not, right?
So it seems like this change needs a bit more work.
pw-bot: cr
Hi Andrii,
IMO since both the patches 7e92e01b7245(powerpc: Provide syscall wrapper) and 94746890202cf(powerpc: Don't add __powerpc_ prefix to syscall entry points) went into the same kernel version v6.1-rc1, there won't me much kernel versions that has only one of these patches.
Also, to test more I tried this patch with ARCH_HAS_SYSCALL_WRAPPER disabled, and it the test passed in this case too.
Keep in mind that libbpf is supposed to work across many kernel versions. So as long as there are powerpc (old) kernels that do use arch-specific prefix, we need to detect them and supply prefix when attaching ksyscall programs.
Hi Andrii,
Sorry about the delayed response, I have started looking at this after a vacation.
There are unlikely to be any old kernels that use arch-specific prefix as syscall wrapper support was added to powerpc in v6.1 and commit 94746890202cf that dropped the prefix also went into the same kernel release (v6.1-rc1). So, is it worth it support both sys_bpf and __powerpc_sys_bpf cases?
But yes, there can be a kernel without syscall wrapper but having the sys_bpf symbol. So, how about identifying syscall wrapper enablement with __se_sys_bpf instead:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 66173ddb5a2d..ff69a30cfe9b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11163,11 +11163,15 @@ int probe_kern_syscall_wrapper(int token_fd) char syscall_name[64]; const char *ksys_pfx;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "__se_sys_bpf", ksys_pfx);
+#else ksys_pfx = arch_specific_syscall_pfx(); if (!ksys_pfx) return 0;
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
+#endif
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11176,16 +11180,28 @@ int probe_kern_syscall_wrapper(int token_fd) if (pfd >= 0) close(pfd);
+#if defined(__powerpc__) return pfd >= 0 ? 1 : 0; +#else
return pfd >= 0 ? 1 : 0;
+#endif } else { /* legacy mode */ char probe_name[128];
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
+#if defined(__powerpc__)
return 1;
+#else return 0; +#endif
(void)remove_kprobe_event_legacy(probe_name, false);
+#if defined(__powerpc__)
return 0;
+#else return 1; +#endif } }
Actually, all architectures could use this '__se_' prefix instead of arch specific prefix to identify if syscall wrapper is enabled. Separate way to handle powerpc case may not be needed. Will wait for your inputs to send v2.
the problem is that __se_sys_bpf is not traceable (it's a static function), so it seems like this won't work
it's been a while, let me try to clarify my understanding of the issue. The problem is that powerpc is special in that when syscall wrapper is used, then, unlike all other architectures, they opted to not have arch-specific prefix for syscall wrappers, is that right? and that's why all the dancing you are trying to add. Am I right?
Yes, you got it right. For more details, you can refer to the reasoning behind the change here: https://github.com/torvalds/linux/commit/94746890202cf
Thanks, Saket
Thanks, Saket
Thanks, Saket
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, * compiler does not know that we have an explicit conditional * as well. */ +#if defined(__powerpc__)
snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name);
+#else snprintf(func_name, sizeof(func_name), "__%s_sys_%s", arch_specific_syscall_pfx() ? : "", syscall_name); +#endif } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); } -- 2.43.5
On Sat, Jan 11, 2025 at 11:53 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
CCing Maddy and MPE On Fri, Jan 10, 2025 at 02:29:42PM -0800, Andrii Nakryiko wrote:
On Fri, Jan 10, 2025 at 2:49 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Thu, Nov 21, 2024 at 04:00:13PM -0800, Andrii Nakryiko wrote:
On Wed, Nov 20, 2024 at 6:52 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Fri, Nov 08, 2024 at 10:43:54AM -0800, Andrii Nakryiko wrote:
On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote: > > Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to > syscall entry points") drops _powerpc prefix to syscall entry points, > even though powerpc now supports syscall wrapper, so /proc/kallsyms > have symbols for syscall entry without powerpc prefix(sys_*). > > For this reason, arch specific prefix for syscall functions in powerpc > is dropped. > > Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com > --- > tools/lib/bpf/libbpf.c | 12 +++++++++--- > 1 file changed, 9 insertions(+), 3 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index 219facd0e66e..3a370fa37d8a 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) > #elif defined(__riscv) > return "riscv"; > #elif defined(__powerpc__) > - return "powerpc"; > -#elif defined(__powerpc64__) > - return "powerpc64"; > + return ""; > #else > return NULL; > #endif > @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) > if (!ksys_pfx) > return 0; > > +#if defined(__powerpc__) > + snprintf(syscall_name, sizeof(syscall_name), "sys_bpf"); > +#else > snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); > +#endif
The problem is that on older versions of kernel it will have this prefix, while on newer ones it won't. So to not break anything on old kernels, we'd need to do feature detection and pick whether to use prefix or not, right?
So it seems like this change needs a bit more work.
pw-bot: cr
Hi Andrii,
IMO since both the patches 7e92e01b7245(powerpc: Provide syscall wrapper) and 94746890202cf(powerpc: Don't add __powerpc_ prefix to syscall entry points) went into the same kernel version v6.1-rc1, there won't me much kernel versions that has only one of these patches.
Also, to test more I tried this patch with ARCH_HAS_SYSCALL_WRAPPER disabled, and it the test passed in this case too.
Keep in mind that libbpf is supposed to work across many kernel versions. So as long as there are powerpc (old) kernels that do use arch-specific prefix, we need to detect them and supply prefix when attaching ksyscall programs.
Hi Andrii,
Sorry about the delayed response, I have started looking at this after a vacation.
There are unlikely to be any old kernels that use arch-specific prefix as syscall wrapper support was added to powerpc in v6.1 and commit 94746890202cf that dropped the prefix also went into the same kernel release (v6.1-rc1). So, is it worth it support both sys_bpf and __powerpc_sys_bpf cases?
But yes, there can be a kernel without syscall wrapper but having the sys_bpf symbol. So, how about identifying syscall wrapper enablement with __se_sys_bpf instead:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 66173ddb5a2d..ff69a30cfe9b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11163,11 +11163,15 @@ int probe_kern_syscall_wrapper(int token_fd) char syscall_name[64]; const char *ksys_pfx;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "__se_sys_bpf", ksys_pfx);
+#else ksys_pfx = arch_specific_syscall_pfx(); if (!ksys_pfx) return 0;
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
+#endif
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11176,16 +11180,28 @@ int probe_kern_syscall_wrapper(int token_fd) if (pfd >= 0) close(pfd);
+#if defined(__powerpc__) return pfd >= 0 ? 1 : 0; +#else
return pfd >= 0 ? 1 : 0;
+#endif } else { /* legacy mode */ char probe_name[128];
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
+#if defined(__powerpc__)
return 1;
+#else return 0; +#endif
(void)remove_kprobe_event_legacy(probe_name, false);
+#if defined(__powerpc__)
return 0;
+#else return 1; +#endif } }
Actually, all architectures could use this '__se_' prefix instead of arch specific prefix to identify if syscall wrapper is enabled. Separate way to handle powerpc case may not be needed. Will wait for your inputs to send v2.
the problem is that __se_sys_bpf is not traceable (it's a static function), so it seems like this won't work
it's been a while, let me try to clarify my understanding of the issue. The problem is that powerpc is special in that when syscall wrapper is used, then, unlike all other architectures, they opted to not have arch-specific prefix for syscall wrappers, is that right? and that's why all the dancing you are trying to add. Am I right?
Yes, you got it right. For more details, you can refer to the reasoning behind the change here: https://github.com/torvalds/linux/commit/94746890202cf
That was an unfortunate decision to deviate :(
Alright, so where are we? We can't do __se_<syscall> approach, but we need to have some reliable way to determine whether powerpc uses syscall wrapper. Can you please summarize available options for powerpc? Sorry, it's been a while, so we need to re-page in all the context.
Thanks, Saket
Thanks, Saket
Thanks, Saket
> > if (determine_kprobe_perf_type() >= 0) { > int pfd; > @@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, > * compiler does not know that we have an explicit conditional > * as well. > */ > +#if defined(__powerpc__) > + snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name); > +#else > snprintf(func_name, sizeof(func_name), "__%s_sys_%s", > arch_specific_syscall_pfx() ? : "", syscall_name); > +#endif > } else { > snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); > } > -- > 2.43.5 >
On Tue, Jan 14, 2025 at 02:40:20PM -0800, Andrii Nakryiko wrote:
On Sat, Jan 11, 2025 at 11:53 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
CCing Maddy and MPE On Fri, Jan 10, 2025 at 02:29:42PM -0800, Andrii Nakryiko wrote:
On Fri, Jan 10, 2025 at 2:49 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Thu, Nov 21, 2024 at 04:00:13PM -0800, Andrii Nakryiko wrote:
On Wed, Nov 20, 2024 at 6:52 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Fri, Nov 08, 2024 at 10:43:54AM -0800, Andrii Nakryiko wrote: > On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote: > > > > Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to > > syscall entry points") drops _powerpc prefix to syscall entry points, > > even though powerpc now supports syscall wrapper, so /proc/kallsyms > > have symbols for syscall entry without powerpc prefix(sys_*). > > > > For this reason, arch specific prefix for syscall functions in powerpc > > is dropped. > > > > Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com > > --- > > tools/lib/bpf/libbpf.c | 12 +++++++++--- > > 1 file changed, 9 insertions(+), 3 deletions(-) > > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > > index 219facd0e66e..3a370fa37d8a 100644 > > --- a/tools/lib/bpf/libbpf.c > > +++ b/tools/lib/bpf/libbpf.c > > @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) > > #elif defined(__riscv) > > return "riscv"; > > #elif defined(__powerpc__) > > - return "powerpc"; > > -#elif defined(__powerpc64__) > > - return "powerpc64"; > > + return ""; > > #else > > return NULL; > > #endif > > @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) > > if (!ksys_pfx) > > return 0; > > > > +#if defined(__powerpc__) > > + snprintf(syscall_name, sizeof(syscall_name), "sys_bpf"); > > +#else > > snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); > > +#endif > > The problem is that on older versions of kernel it will have this > prefix, while on newer ones it won't. So to not break anything on old > kernels, we'd need to do feature detection and pick whether to use > prefix or not, right? > > So it seems like this change needs a bit more work. > > pw-bot: cr > Hi Andrii,
IMO since both the patches 7e92e01b7245(powerpc: Provide syscall wrapper) and 94746890202cf(powerpc: Don't add __powerpc_ prefix to syscall entry points) went into the same kernel version v6.1-rc1, there won't me much kernel versions that has only one of these patches.
Also, to test more I tried this patch with ARCH_HAS_SYSCALL_WRAPPER disabled, and it the test passed in this case too.
Keep in mind that libbpf is supposed to work across many kernel versions. So as long as there are powerpc (old) kernels that do use arch-specific prefix, we need to detect them and supply prefix when attaching ksyscall programs.
Hi Andrii,
Sorry about the delayed response, I have started looking at this after a vacation.
There are unlikely to be any old kernels that use arch-specific prefix as syscall wrapper support was added to powerpc in v6.1 and commit 94746890202cf that dropped the prefix also went into the same kernel release (v6.1-rc1). So, is it worth it support both sys_bpf and __powerpc_sys_bpf cases?
But yes, there can be a kernel without syscall wrapper but having the sys_bpf symbol. So, how about identifying syscall wrapper enablement with __se_sys_bpf instead:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 66173ddb5a2d..ff69a30cfe9b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11163,11 +11163,15 @@ int probe_kern_syscall_wrapper(int token_fd) char syscall_name[64]; const char *ksys_pfx;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "__se_sys_bpf", ksys_pfx);
+#else ksys_pfx = arch_specific_syscall_pfx(); if (!ksys_pfx) return 0;
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
+#endif
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11176,16 +11180,28 @@ int probe_kern_syscall_wrapper(int token_fd) if (pfd >= 0) close(pfd);
+#if defined(__powerpc__) return pfd >= 0 ? 1 : 0; +#else
return pfd >= 0 ? 1 : 0;
+#endif } else { /* legacy mode */ char probe_name[128];
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
+#if defined(__powerpc__)
return 1;
+#else return 0; +#endif
(void)remove_kprobe_event_legacy(probe_name, false);
+#if defined(__powerpc__)
return 0;
+#else return 1; +#endif } }
Actually, all architectures could use this '__se_' prefix instead of arch specific prefix to identify if syscall wrapper is enabled. Separate way to handle powerpc case may not be needed. Will wait for your inputs to send v2.
the problem is that __se_sys_bpf is not traceable (it's a static function), so it seems like this won't work
it's been a while, let me try to clarify my understanding of the issue. The problem is that powerpc is special in that when syscall wrapper is used, then, unlike all other architectures, they opted to not have arch-specific prefix for syscall wrappers, is that right? and that's why all the dancing you are trying to add. Am I right?
Yes, you got it right. For more details, you can refer to the reasoning behind the change here: https://github.com/torvalds/linux/commit/94746890202cf
That was an unfortunate decision to deviate :(
Alright, so where are we? We can't do __se_<syscall> approach, but we need to have some reliable way to determine whether powerpc uses syscall wrapper. Can you please summarize available options for powerpc? Sorry, it's been a while, so we need to re-page in all the context.
Hi Andrii,
1. On powerpc we are able to set kprobe on __se_sys_bpf, we are thinking to use this to check if syscall wrapper is enabled.
Snippet from kernel where syscall wrapper wasn't there for powerpc:
# uname -r 6.0.0
# cat kprobe_events p:kprobes/p_kprobe2_user_events_osquery netlink_ack r64:kprobes/r_kprobe_user_events_osquery audit_receive p:kprobes/p_kprobe_user_events_osquery audit_receive p:kprobes/my_probe __se_sys_bpf
# cat trace # tracer: nop # # entries-in-buffer/entries-written: 20/20 #P:64 # # _-----=> irqs-off/BH-disabled # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / _-=> migrate-disable # |||| / delay # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | test_progs-1971 [034] ..... 532.732614: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.732843: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733120: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733485: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733499: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733507: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733512: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733552: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733577: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733581: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733586: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733592: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733596: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733601: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733606: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733612: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733622: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733658: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733740: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.736043: my_probe: (sys_bpf+0xc/0x40)
2. The other is sys_bpf, but this symbol exists in both cases(kernel where syscall wrapper is enabled and where it is disabled).
Kernel with syscall wrapper not introduced in powerpc:
# uname -r 6.0.0
# cat /proc/kallsyms | grep sys_bpf c000000000383630 t __sys_bpf c0000000003844a0 T bpf_sys_bpf c000000000384510 T kern_sys_bpf c000000000384840 T sys_bpf c000000000384840 T __se_sys_bpf c000000001030c80 d bpf_sys_bpf_proto c0000000014a8bf8 d __ksymtab_kern_sys_bpf c0000000014eac1f r __kstrtab_kern_sys_bpf c0000000014fa53b r __kstrtabns_kern_sys_bpf c000000002151e90 d _eil_addr_sys_bpf
Kernel with syscall wrapper introduced in powerpc:
# uname -r 6.13.0-rc6+
# cat /proc/kallsyms | grep sys_bpf c0000000003d7750 t __sys_bpf c0000000003d83ac T bpf_sys_bpf c0000000003d8418 T kern_sys_bpf c0000000003d8734 T sys_bpf c000000001243328 d bpf_sys_bpf_proto c0000000017776b0 r __ksymtab_kern_sys_bpf c0000000021b7520 d _eil_addr_sys_bpf
Thanks, Saket
Thanks, Saket
Thanks, Saket
Thanks, Saket > > > > if (determine_kprobe_perf_type() >= 0) { > > int pfd; > > @@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, > > * compiler does not know that we have an explicit conditional > > * as well. > > */ > > +#if defined(__powerpc__) > > + snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name); > > +#else > > snprintf(func_name, sizeof(func_name), "__%s_sys_%s", > > arch_specific_syscall_pfx() ? : "", syscall_name); > > +#endif > > } else { > > snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); > > } > > -- > > 2.43.5 > >
On Wed, Jan 15, 2025 at 6:16 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Tue, Jan 14, 2025 at 02:40:20PM -0800, Andrii Nakryiko wrote:
On Sat, Jan 11, 2025 at 11:53 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
CCing Maddy and MPE On Fri, Jan 10, 2025 at 02:29:42PM -0800, Andrii Nakryiko wrote:
On Fri, Jan 10, 2025 at 2:49 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote:
On Thu, Nov 21, 2024 at 04:00:13PM -0800, Andrii Nakryiko wrote:
On Wed, Nov 20, 2024 at 6:52 AM Saket Kumar Bhaskar skb99@linux.ibm.com wrote: > > On Fri, Nov 08, 2024 at 10:43:54AM -0800, Andrii Nakryiko wrote: > > On Sun, Nov 3, 2024 at 9:00 PM Saket Kumar Bhaskar skb99@linux.ibm.com wrote: > > > > > > Since commit 94746890202cf ("powerpc: Don't add __powerpc_ prefix to > > > syscall entry points") drops _powerpc prefix to syscall entry points, > > > even though powerpc now supports syscall wrapper, so /proc/kallsyms > > > have symbols for syscall entry without powerpc prefix(sys_*). > > > > > > For this reason, arch specific prefix for syscall functions in powerpc > > > is dropped. > > > > > > Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com > > > --- > > > tools/lib/bpf/libbpf.c | 12 +++++++++--- > > > 1 file changed, 9 insertions(+), 3 deletions(-) > > > > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > > > index 219facd0e66e..3a370fa37d8a 100644 > > > --- a/tools/lib/bpf/libbpf.c > > > +++ b/tools/lib/bpf/libbpf.c > > > @@ -11110,9 +11110,7 @@ static const char *arch_specific_syscall_pfx(void) > > > #elif defined(__riscv) > > > return "riscv"; > > > #elif defined(__powerpc__) > > > - return "powerpc"; > > > -#elif defined(__powerpc64__) > > > - return "powerpc64"; > > > + return ""; > > > #else > > > return NULL; > > > #endif > > > @@ -11127,7 +11125,11 @@ int probe_kern_syscall_wrapper(int token_fd) > > > if (!ksys_pfx) > > > return 0; > > > > > > +#if defined(__powerpc__) > > > + snprintf(syscall_name, sizeof(syscall_name), "sys_bpf"); > > > +#else > > > snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); > > > +#endif > > > > The problem is that on older versions of kernel it will have this > > prefix, while on newer ones it won't. So to not break anything on old > > kernels, we'd need to do feature detection and pick whether to use > > prefix or not, right? > > > > So it seems like this change needs a bit more work. > > > > pw-bot: cr > > > Hi Andrii, > > IMO since both the patches 7e92e01b7245(powerpc: Provide syscall wrapper) > and 94746890202cf(powerpc: Don't add __powerpc_ prefix to syscall entry points) > went into the same kernel version v6.1-rc1, there won't me much kernel > versions that has only one of these patches. > > Also, to test more I tried this patch with ARCH_HAS_SYSCALL_WRAPPER disabled, > and it the test passed in this case too. >
Keep in mind that libbpf is supposed to work across many kernel versions. So as long as there are powerpc (old) kernels that do use arch-specific prefix, we need to detect them and supply prefix when attaching ksyscall programs.
Hi Andrii,
Sorry about the delayed response, I have started looking at this after a vacation.
There are unlikely to be any old kernels that use arch-specific prefix as syscall wrapper support was added to powerpc in v6.1 and commit 94746890202cf that dropped the prefix also went into the same kernel release (v6.1-rc1). So, is it worth it support both sys_bpf and __powerpc_sys_bpf cases?
But yes, there can be a kernel without syscall wrapper but having the sys_bpf symbol. So, how about identifying syscall wrapper enablement with __se_sys_bpf instead:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 66173ddb5a2d..ff69a30cfe9b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11163,11 +11163,15 @@ int probe_kern_syscall_wrapper(int token_fd) char syscall_name[64]; const char *ksys_pfx;
+#if defined(__powerpc__)
snprintf(syscall_name, sizeof(syscall_name), "__se_sys_bpf", ksys_pfx);
+#else ksys_pfx = arch_specific_syscall_pfx(); if (!ksys_pfx) return 0;
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
+#endif
if (determine_kprobe_perf_type() >= 0) { int pfd;
@@ -11176,16 +11180,28 @@ int probe_kern_syscall_wrapper(int token_fd) if (pfd >= 0) close(pfd);
+#if defined(__powerpc__) return pfd >= 0 ? 1 : 0; +#else
return pfd >= 0 ? 1 : 0;
+#endif } else { /* legacy mode */ char probe_name[128];
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
+#if defined(__powerpc__)
return 1;
+#else return 0; +#endif
(void)remove_kprobe_event_legacy(probe_name, false);
+#if defined(__powerpc__)
return 0;
+#else return 1; +#endif } }
Actually, all architectures could use this '__se_' prefix instead of arch specific prefix to identify if syscall wrapper is enabled. Separate way to handle powerpc case may not be needed. Will wait for your inputs to send v2.
the problem is that __se_sys_bpf is not traceable (it's a static function), so it seems like this won't work
it's been a while, let me try to clarify my understanding of the issue. The problem is that powerpc is special in that when syscall wrapper is used, then, unlike all other architectures, they opted to not have arch-specific prefix for syscall wrappers, is that right? and that's why all the dancing you are trying to add. Am I right?
Yes, you got it right. For more details, you can refer to the reasoning behind the change here: https://github.com/torvalds/linux/commit/94746890202cf
That was an unfortunate decision to deviate :(
Alright, so where are we? We can't do __se_<syscall> approach, but we need to have some reliable way to determine whether powerpc uses syscall wrapper. Can you please summarize available options for powerpc? Sorry, it's been a while, so we need to re-page in all the context.
Hi Andrii,
- On powerpc we are able to set kprobe on __se_sys_bpf, we are thinking to use this to check if syscall wrapper is enabled.
I'm not trying to be difficult, but what does guarantee that this is always the case. I'm looking at this:
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))
in arch/powerpc/include/asm/syscall_wrapper.h
It's static, so it's up to the compiler to decide whether to inline this function or not. Once inlined, it's effectively not there.
Snippet from kernel where syscall wrapper wasn't there for powerpc:
# uname -r 6.0.0
# cat kprobe_events p:kprobes/p_kprobe2_user_events_osquery netlink_ack r64:kprobes/r_kprobe_user_events_osquery audit_receive p:kprobes/p_kprobe_user_events_osquery audit_receive p:kprobes/my_probe __se_sys_bpf
# cat trace # tracer: nop # # entries-in-buffer/entries-written: 20/20 #P:64 # # _-----=> irqs-off/BH-disabled # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / _-=> migrate-disable # |||| / delay # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | test_progs-1971 [034] ..... 532.732614: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.732843: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733120: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733485: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733499: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733507: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733512: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733552: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733577: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733581: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733586: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733592: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733596: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733601: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733606: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733612: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733622: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733658: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.733740: my_probe: (sys_bpf+0xc/0x40) test_progs-1971 [034] ..... 532.736043: my_probe: (sys_bpf+0xc/0x40)
- The other is sys_bpf, but this symbol exists in both cases(kernel where syscall wrapper is enabled and where it is disabled).
Kernel with syscall wrapper not introduced in powerpc:
# uname -r 6.0.0
# cat /proc/kallsyms | grep sys_bpf c000000000383630 t __sys_bpf c0000000003844a0 T bpf_sys_bpf c000000000384510 T kern_sys_bpf c000000000384840 T sys_bpf c000000000384840 T __se_sys_bpf c000000001030c80 d bpf_sys_bpf_proto c0000000014a8bf8 d __ksymtab_kern_sys_bpf c0000000014eac1f r __kstrtab_kern_sys_bpf c0000000014fa53b r __kstrtabns_kern_sys_bpf c000000002151e90 d _eil_addr_sys_bpf
Kernel with syscall wrapper introduced in powerpc:
# uname -r 6.13.0-rc6+
# cat /proc/kallsyms | grep sys_bpf c0000000003d7750 t __sys_bpf c0000000003d83ac T bpf_sys_bpf c0000000003d8418 T kern_sys_bpf c0000000003d8734 T sys_bpf c000000001243328 d bpf_sys_bpf_proto c0000000017776b0 r __ksymtab_kern_sys_bpf c0000000021b7520 d _eil_addr_sys_bpf
Thanks, Saket
Thanks, Saket
Thanks, Saket
> Thanks, > Saket > > > > > > if (determine_kprobe_perf_type() >= 0) { > > > int pfd; > > > @@ -11272,8 +11274,12 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, > > > * compiler does not know that we have an explicit conditional > > > * as well. > > > */ > > > +#if defined(__powerpc__) > > > + snprintf(func_name, sizeof(func_name), "sys_%s", syscall_name); > > > +#else > > > snprintf(func_name, sizeof(func_name), "__%s_sys_%s", > > > arch_specific_syscall_pfx() ? : "", syscall_name); > > > +#endif > > > } else { > > > snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); > > > } > > > -- > > > 2.43.5 > > >
SYS_PREFIX was missing for a powerpc, which made a kprobe test to sys_prctl fail.
Add missing SYS_PREFIX for powerpc.
Fixes: 7e92e01b7245 ("powerpc: Provide syscall wrapper") Fixes: 94746890202c ("powerpc: Don't add __powerpc_ prefix to syscall entry points") Signed-off-by: Saket Kumar Bhaskar skb99@linux.ibm.com --- tools/testing/selftests/bpf/progs/bpf_misc.h | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index eccaf955e394..ae6beb2fb480 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -160,6 +160,9 @@ #elif defined(__TARGET_ARCH_riscv) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__riscv_" +#elif defined(__TARGET_ARCH_powerpc) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "" #else #define SYSCALL_WRAPPER 0 #define SYS_PREFIX "__se_"
linux-kselftest-mirror@lists.linaro.org