On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky davemarchevsky@fb.com wrote:
This helper is meant to be "bpf_trace_printk, but with proper vararg
We have bpf_snprintf() and bpf_seq_printf() names for other BPF helpers using the same approach. How about we call this one simply `bpf_printf`? It will be in line with other naming, it is logical BPF equivalent of user-space printf (which outputs to stderr, which in BPF land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical to have a nice and short BPF_PRINTF() convenience macro provided by libbpf.
support". Follow bpf_snprintf's example and take a u64 pseudo-vararg array. Write to dmesg using the same mechanism as bpf_trace_printk.
Are you sure about the dmesg part?... bpf_trace_printk is outputting into /sys/kernel/debug/tracing/trace_pipe.
Signed-off-by: Dave Marchevsky davemarchevsky@fb.com
include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 23 +++++++++++++++ kernel/bpf/core.c | 5 ++++ kernel/bpf/helpers.c | 2 ++ kernel/trace/bpf_trace.c | 52 +++++++++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 23 +++++++++++++++ 6 files changed, 105 insertions(+), 1 deletion(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index be8d57e6e78a..b6c45a6cbbba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1088,6 +1088,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f int bpf_prog_calc_tag(struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void); +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void);
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c4f7892edb2b..899a2649d986 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4871,6 +4871,28 @@ union bpf_attr {
Return
Value specified by user at BPF link creation/attachment time
or 0, if it was not specified.
- u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
Description
Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
to format. Supports up to 12 arguments to print in this way.
we didn't specify 12 in the description of bpf_snprintf() or bpf_seq_printf(), so why start doing that here? For data/args format, let's just refer to bpf_snprintf() or bpf_seq_printf(), whichever does a better job explaining this :)
The *fmt* and *fmt_size* are for the format string itself. The *data* and
*data_len* are format string arguments.
Each format specifier in **fmt** corresponds to one u64 element
in the **data** array. For strings and pointers where pointees
are accessed, only the pointer values are stored in the *data*
array. The *data_len* is the size of *data* in bytes.
Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
Reading kernel memory may fail due to either invalid address or
valid address but requiring a major memory fault. If reading kernel memory
fails, the string for **%s** will be an empty string, and the ip
address for **%p{i,I}{4,6}** will be 0. Not returning error to
bpf program is consistent with what **bpf_trace_printk**\ () does for now.
This is just a copy/paste from other helpers. Let's avoid duplication and just point people to a description in other helpers.
Return
The number of bytes written to the buffer, or a negative error
*/
in case of failure.
#define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5048,6 +5070,7 @@ union bpf_attr { FN(timer_cancel), \ FN(get_func_ip), \ FN(get_attach_cookie), \
FN(trace_vprintk), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 91f24c7b38a1..a137c550046c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2357,6 +2357,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) return NULL; }
+const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void) +{
return NULL;
+}
u64 __weak bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5ce19b376ef7..863e5ee68558 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1419,6 +1419,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_snprintf_btf_proto; case BPF_FUNC_snprintf: return &bpf_snprintf_proto;
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto(); default: return NULL; }
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2cf4bfa1ab7b..8b3f1ec9e082 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -398,7 +398,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .arg2_type = ARG_CONST_SIZE, };
-const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +static __always_inline void __set_printk_clr_event(void)
Please drop __always_inline, we only use __always_inline for absolutely performance critical routines. Let the compiler decide.
{ /* * This program might be calling bpf_trace_printk, @@ -410,10 +410,58 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) */ if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) pr_warn_ratelimited("could not enable bpf_trace_printk events"); +}
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +{
__set_printk_clr_event(); return &bpf_trace_printk_proto;
}
+BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
u32, data_len)
+{
static char buf[BPF_TRACE_PRINTK_SIZE];
unsigned long flags;
int ret, num_args;
u32 *bin_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
(data_len && !data))
return -EINVAL;
num_args = data_len / 8;
ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
if (ret < 0)
return ret;
raw_spin_lock_irqsave(&trace_printk_lock, flags);
ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
bpf_bprintf_cleanup();
return ret;
+}
+static const struct bpf_func_proto bpf_trace_vprintk_proto = {
.func = bpf_trace_vprintk,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_PTR_TO_MEM_OR_NULL,
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
+};
+const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) +{
__set_printk_clr_event();
return &bpf_trace_vprintk_proto;
+}
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, const void *, data, u32, data_len) { @@ -1113,6 +1161,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_snprintf_proto; case BPF_FUNC_get_func_ip: return &bpf_get_func_ip_proto_tracing;
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto(); default: return bpf_base_func_proto(func_id); }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c4f7892edb2b..899a2649d986 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4871,6 +4871,28 @@ union bpf_attr {
Return
Value specified by user at BPF link creation/attachment time
or 0, if it was not specified.
- u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
Description
Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
to format. Supports up to 12 arguments to print in this way.
The *fmt* and *fmt_size* are for the format string itself. The *data* and
*data_len* are format string arguments.
Each format specifier in **fmt** corresponds to one u64 element
in the **data** array. For strings and pointers where pointees
are accessed, only the pointer values are stored in the *data*
array. The *data_len* is the size of *data* in bytes.
Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
Reading kernel memory may fail due to either invalid address or
valid address but requiring a major memory fault. If reading kernel memory
fails, the string for **%s** will be an empty string, and the ip
address for **%p{i,I}{4,6}** will be 0. Not returning error to
bpf program is consistent with what **bpf_trace_printk**\ () does for now.
Return
The number of bytes written to the buffer, or a negative error
*/
in case of failure.
#define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5048,6 +5070,7 @@ union bpf_attr { FN(timer_cancel), \ FN(get_func_ip), \ FN(get_attach_cookie), \
FN(trace_vprintk), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
2.30.2