A helper is added to support tracing kernel type information in BPF using the BPF Type Format (BTF). Its signature is
long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id, u64 flags);
struct btf_ptr * specifies
- a pointer to the data to be traced; - the BTF id of the type of data pointed to; or - a string representation of the type of data pointed to - a flags field is provided for future use; these flags are not to be confused with the BTF_TRACE_F_* flags below that control how the btf_ptr is displayed; the flags member of the struct btf_ptr may be used to disambiguate types in kernel versus module BTF, etc; the main distinction is the flags relate to the type and information needed in identifying it; not how it is displayed.
The helper also specifies a trace id which is set for the bpf_trace_printk tracepoint; this allows BPF programs to filter on specific trace ids, ensuring output does not become mixed between different traced events and hard to read.
For example a BPF program with a struct sk_buff *skb could do the following:
static const char *skb_type = "struct sk_buff"; static struct btf_ptr b = { };
b.ptr = skb; b.type = skb_type; bpf_trace_btf(&b, sizeof(b), 0, 0);
Default output in the trace_pipe looks like this:
<idle>-0 [023] d.s. 1825.778400: bpf_trace_printk: (struct sk_buff){ <idle>-0 [023] d.s. 1825.778409: bpf_trace_printk: (union){ <idle>-0 [023] d.s. 1825.778410: bpf_trace_printk: (struct){ <idle>-0 [023] d.s. 1825.778412: bpf_trace_printk: .prev = (struct sk_buff *)0x00000000b2a3df7e, <idle>-0 [023] d.s. 1825.778413: bpf_trace_printk: (union){ <idle>-0 [023] d.s. 1825.778414: bpf_trace_printk: .dev = (struct net_device *)0x000000001658808b, <idle>-0 [023] d.s. 1825.778416: bpf_trace_printk: .dev_scratch = (long unsigned int)18446628460391432192, <idle>-0 [023] d.s. 1825.778417: bpf_trace_printk: }, <idle>-0 [023] d.s. 1825.778417: bpf_trace_printk: }, <idle>-0 [023] d.s. 1825.778418: bpf_trace_printk: .rbnode = (struct rb_node){ <idle>-0 [023] d.s. 1825.778419: bpf_trace_printk: .rb_right = (struct rb_node *)0x00000000b2a3df7e, <idle>-0 [023] d.s. 1825.778420: bpf_trace_printk: .rb_left = (struct rb_node *)0x000000001658808b, <idle>-0 [023] d.s. 1825.778420: bpf_trace_printk: }, <idle>-0 [023] d.s. 1825.778421: bpf_trace_printk: .list = (struct list_head){ <idle>-0 [023] d.s. 1825.778422: bpf_trace_printk: .prev = (struct list_head *)0x00000000b2a3df7e, <idle>-0 [023] d.s. 1825.778422: bpf_trace_printk: }, <idle>-0 [023] d.s. 1825.778422: bpf_trace_printk: }, <idle>-0 [023] d.s. 1825.778426: bpf_trace_printk: .len = (unsigned int)168, <idle>-0 [023] d.s. 1825.778427: bpf_trace_printk: .mac_len = (__u16)14, <idle>-0 [023] d.s. 1825.778428: bpf_trace_printk: .queue_mapping = (__u16)17, <idle>-0 [023] d.s. 1825.778430: bpf_trace_printk: .head_frag = (__u8)0x1, <idle>-0 [023] d.s. 1825.778431: bpf_trace_printk: .ip_summed = (__u8)0x1, <idle>-0 [023] d.s. 1825.778432: bpf_trace_printk: .l4_hash = (__u8)0x1, <idle>-0 [023] d.s. 1825.778433: bpf_trace_printk: .hash = (__u32)1873247608, <idle>-0 [023] d.s. 1825.778434: bpf_trace_printk: (union){ <idle>-0 [023] d.s. 1825.778435: bpf_trace_printk: .napi_id = (unsigned int)8209, <idle>-0 [023] d.s. 1825.778436: bpf_trace_printk: .sender_cpu = (unsigned int)8209, <idle>-0 [023] d.s. 1825.778436: bpf_trace_printk: }, <idle>-0 [023] d.s. 1825.778437: bpf_trace_printk: .protocol = (__be16)8, <idle>-0 [023] d.s. 1825.778438: bpf_trace_printk: .transport_header = (__u16)226, <idle>-0 [023] d.s. 1825.778439: bpf_trace_printk: .network_header = (__u16)206, <idle>-0 [023] d.s. 1825.778440: bpf_trace_printk: .mac_header = (__u16)192, <idle>-0 [023] d.s. 1825.778440: bpf_trace_printk: .tail = (sk_buff_data_t)374, <idle>-0 [023] d.s. 1825.778441: bpf_trace_printk: .end = (sk_buff_data_t)1728, <idle>-0 [023] d.s. 1825.778442: bpf_trace_printk: .head = (unsigned char *)0x000000009798cb6b, <idle>-0 [023] d.s. 1825.778443: bpf_trace_printk: .data = (unsigned char *)0x0000000064823282, <idle>-0 [023] d.s. 1825.778444: bpf_trace_printk: .truesize = (unsigned int)2304, <idle>-0 [023] d.s. 1825.778445: bpf_trace_printk: .users = (refcount_t){ <idle>-0 [023] d.s. 1825.778445: bpf_trace_printk: .refs = (atomic_t){ <idle>-0 [023] d.s. 1825.778447: bpf_trace_printk: .counter = (int)1, <idle>-0 [023] d.s. 1825.778447: bpf_trace_printk: }, <idle>-0 [023] d.s. 1825.778447: bpf_trace_printk: }, <idle>-0 [023] d.s. 1825.778448: bpf_trace_printk: }
Flags modifying display are as follows:
- BTF_TRACE_F_COMPACT: no formatting around type information - BTF_TRACE_F_NONAME: no struct/union member names/types - BTF_TRACE_F_PTR_RAW: show raw (unobfuscated) pointer values; equivalent to %px. - BTF_TRACE_F_ZERO: show zero-valued struct/union members; they are not displayed by default
Signed-off-by: Alan Maguire alan.maguire@oracle.com --- include/linux/bpf.h | 1 + include/linux/btf.h | 9 ++-- include/uapi/linux/bpf.h | 63 +++++++++++++++++++++++++ kernel/bpf/core.c | 5 ++ kernel/bpf/helpers.c | 4 ++ kernel/trace/bpf_trace.c | 102 ++++++++++++++++++++++++++++++++++++++++- scripts/bpf_helpers_doc.py | 2 + tools/include/uapi/linux/bpf.h | 63 +++++++++++++++++++++++++ 8 files changed, 243 insertions(+), 6 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6143b6e..f67819d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -934,6 +934,7 @@ struct bpf_event_entry { const char *kernel_type_name(u32 btf_type_id);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void); +const struct bpf_func_proto *bpf_get_trace_btf_proto(void);
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); diff --git a/include/linux/btf.h b/include/linux/btf.h index 46bf9f4..3d31e28 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -6,6 +6,7 @@
#include <linux/types.h> #include <uapi/linux/btf.h> +#include <uapi/linux/bpf.h>
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
@@ -61,10 +62,10 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, * - BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read * data before displaying it. */ -#define BTF_SHOW_COMPACT (1ULL << 0) -#define BTF_SHOW_NONAME (1ULL << 1) -#define BTF_SHOW_PTR_RAW (1ULL << 2) -#define BTF_SHOW_ZERO (1ULL << 3) +#define BTF_SHOW_COMPACT BTF_TRACE_F_COMPACT +#define BTF_SHOW_NONAME BTF_TRACE_F_NONAME +#define BTF_SHOW_PTR_RAW BTF_TRACE_F_PTR_RAW +#define BTF_SHOW_ZERO BTF_TRACE_F_ZERO #define BTF_SHOW_NONEWLINE (1ULL << 32) #define BTF_SHOW_UNSAFE (1ULL << 33)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b134e67..726fee4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3394,6 +3394,36 @@ struct bpf_stack_build_id { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * + * long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id, u64 flags) + * Description + * Utilize BTF to trace a representation of *ptr*->ptr, using + * *ptr*->type name or *ptr*->type_id. *ptr*->type_name + * should specify the type *ptr*->ptr points to. Traversing that + * data structure using BTF, the type information and values are + * bpf_trace_printk()ed. Safe copy of the pointer data is + * carried out to avoid kernel crashes during data display. + * Tracing specifies *trace_id* as the id associated with the + * trace event; this can be used to filter trace events + * to show a subset of all traced output, helping to avoid + * the situation where BTF output is intermixed with other + * output. + * + * *flags* is a combination of + * + * **BTF_TRACE_F_COMPACT** + * no formatting around type information + * **BTF_TRACE_F_NONAME** + * no struct/union member names/types + * **BTF_TRACE_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_TRACE_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes traced, or a negative error in cases of + * failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3538,6 +3568,7 @@ struct bpf_stack_build_id { FN(skc_to_tcp_request_sock), \ FN(skc_to_udp6_sock), \ FN(get_task_stack), \ + FN(trace_btf), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4446,4 +4477,36 @@ struct bpf_sk_lookup { __u32 local_port; /* Host byte order */ };
+/* + * struct btf_ptr is used for typed pointer display; the + * additional type string/BTF type id are used to render the pointer + * data as the appropriate type via the bpf_trace_btf() helper + * above. A flags field - potentially to specify additional details + * about the BTF pointer (rather than its mode of display) - is + * present for future use. Display flags - BTF_TRACE_F_* - are + * passed to display functions separately. + */ +struct btf_ptr { + void *ptr; + const char *type; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_trace_btf() behaviour. + * - BTF_TRACE_F_COMPACT: no formatting around type information + * - BTF_TRACE_F_NONAME: no struct/union member names/types + * - BTF_TRACE_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_TRACE_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_TRACE_F_COMPACT = (1ULL << 0), + BTF_TRACE_F_NONAME = (1ULL << 1), + BTF_TRACE_F_PTR_RAW = (1ULL << 2), + BTF_TRACE_F_ZERO = (1ULL << 3), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bde9334..82b3a98 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2214,6 +2214,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) return NULL; }
+const struct bpf_func_proto * __weak bpf_get_trace_btf_proto(void) +{ + return NULL; +} + u64 __weak bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index be43ab3..b9a842b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -661,6 +661,10 @@ static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, if (!perfmon_capable()) return NULL; return bpf_get_trace_printk_proto(); + case BPF_FUNC_trace_btf: + if (!perfmon_capable()) + return NULL; + return bpf_get_trace_btf_proto(); case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; default: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6453a75..92212a1 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -14,8 +14,12 @@ #include <linux/spinlock.h> #include <linux/syscalls.h> #include <linux/error-injection.h> +#include <linux/btf.h> #include <linux/btf_ids.h>
+#include <uapi/linux/bpf.h> +#include <uapi/linux/btf.h> + #include <asm/tlb.h>
#include "trace_probe.h" @@ -555,10 +559,91 @@ static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) .arg2_type = ARG_CONST_SIZE, };
-const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +#define BTF_TRACE_F_ALL (BTF_TRACE_F_COMPACT | BTF_TRACE_F_NONAME | \ + BTF_TRACE_F_PTR_RAW | BTF_TRACE_F_ZERO) + +BPF_CALL_4(bpf_trace_btf, struct btf_ptr *, ptr, u32, btf_ptr_size, + u32, trace_id, u64, flags) +{ + u8 btf_kind = BTF_KIND_TYPEDEF; + char type_name[KSYM_NAME_LEN]; + const struct btf_type *t; + const struct btf *btf; + const char *btf_type; + s32 btf_id; + int ret; + + if (unlikely(flags & ~(BTF_TRACE_F_ALL))) + return -EINVAL; + + if (btf_ptr_size != sizeof(struct btf_ptr)) + return -EINVAL; + + btf = bpf_get_btf_vmlinux(); + + if (IS_ERR_OR_NULL(btf)) + return PTR_ERR(btf); + + if (ptr->type != NULL) { + ret = copy_from_kernel_nofault(type_name, ptr->type, + sizeof(type_name)); + if (ret) + return ret; + + btf_type = type_name; + + if (strncmp(btf_type, "struct ", strlen("struct ")) == 0) { + btf_kind = BTF_KIND_STRUCT; + btf_type += strlen("struct "); + } else if (strncmp(btf_type, "union ", strlen("union ")) == 0) { + btf_kind = BTF_KIND_UNION; + btf_type += strlen("union "); + } else if (strncmp(btf_type, "enum ", strlen("enum ")) == 0) { + btf_kind = BTF_KIND_ENUM; + btf_type += strlen("enum "); + } + + if (strlen(btf_type) == 0) + return -EINVAL; + + /* + * Assume type specified is a typedef as there's not much + * benefit in specifying int types other than wasting time + * on BTF lookups; we optimize for the most useful path. + * + * Fall back to BTF_KIND_INT if this fails. + */ + btf_id = btf_find_by_name_kind(btf, btf_type, btf_kind); + if (btf_id < 0) + btf_id = btf_find_by_name_kind(btf, btf_type, + BTF_KIND_INT); + } else if (ptr->type_id > 0) + btf_id = ptr->type_id; + else + return -EINVAL; + + if (btf_id > 0) + t = btf_type_by_id(btf, btf_id); + if (btf_id <= 0 || !t) + return -ENOENT; + + return btf_type_trace_show(btf, btf_id, ptr->ptr, trace_id, flags); +} + +static const struct bpf_func_proto bpf_trace_btf_proto = { + .func = bpf_trace_btf, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + +static void bpf_trace_printk_enable(void) { /* - * This program might be calling bpf_trace_printk, + * This program might be calling bpf_trace_[printk|btf], * so enable the associated bpf_trace/bpf_trace_printk event. * Repeat this each time as it is possible a user has * disabled bpf_trace_printk events. By loading a program @@ -567,10 +652,21 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) */ if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) pr_warn_ratelimited("could not enable bpf_trace_printk events"); +} +const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +{ + bpf_trace_printk_enable();
return &bpf_trace_printk_proto; }
+const struct bpf_func_proto *bpf_get_trace_btf_proto(void) +{ + bpf_trace_printk_enable(); + + return &bpf_trace_btf_proto; +} + #define MAX_SEQ_PRINTF_VARARGS 12 #define MAX_SEQ_PRINTF_MAX_MEMCPY 6 #define MAX_SEQ_PRINTF_STR_LEN 128 @@ -1139,6 +1235,8 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) return &bpf_get_current_comm_proto; case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); + case BPF_FUNC_trace_btf: + return bpf_get_trace_btf_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; case BPF_FUNC_get_numa_node_id: diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 5bfa448..7c7384b 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -432,6 +432,7 @@ class PrinterHelpers(Printer): 'struct __sk_buff', 'struct sk_msg_md', 'struct xdp_md', + 'struct btf_ptr', ] known_types = { '...', @@ -472,6 +473,7 @@ class PrinterHelpers(Printer): 'struct tcp_request_sock', 'struct udp6_sock', 'struct task_struct', + 'struct btf_ptr', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b134e67..726fee4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3394,6 +3394,36 @@ struct bpf_stack_build_id { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * + * long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id, u64 flags) + * Description + * Utilize BTF to trace a representation of *ptr*->ptr, using + * *ptr*->type name or *ptr*->type_id. *ptr*->type_name + * should specify the type *ptr*->ptr points to. Traversing that + * data structure using BTF, the type information and values are + * bpf_trace_printk()ed. Safe copy of the pointer data is + * carried out to avoid kernel crashes during data display. + * Tracing specifies *trace_id* as the id associated with the + * trace event; this can be used to filter trace events + * to show a subset of all traced output, helping to avoid + * the situation where BTF output is intermixed with other + * output. + * + * *flags* is a combination of + * + * **BTF_TRACE_F_COMPACT** + * no formatting around type information + * **BTF_TRACE_F_NONAME** + * no struct/union member names/types + * **BTF_TRACE_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_TRACE_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes traced, or a negative error in cases of + * failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3538,6 +3568,7 @@ struct bpf_stack_build_id { FN(skc_to_tcp_request_sock), \ FN(skc_to_udp6_sock), \ FN(get_task_stack), \ + FN(trace_btf), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4446,4 +4477,36 @@ struct bpf_sk_lookup { __u32 local_port; /* Host byte order */ };
+/* + * struct btf_ptr is used for typed pointer display; the + * additional type string/BTF type id are used to render the pointer + * data as the appropriate type via the bpf_trace_btf() helper + * above. A flags field - potentially to specify additional details + * about the BTF pointer (rather than its mode of display) - is + * present for future use. Display flags - BTF_TRACE_F_* - are + * passed to display functions separately. + */ +struct btf_ptr { + void *ptr; + const char *type; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_trace_btf() behaviour. + * - BTF_TRACE_F_COMPACT: no formatting around type information + * - BTF_TRACE_F_NONAME: no struct/union member names/types + * - BTF_TRACE_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_TRACE_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_TRACE_F_COMPACT = (1ULL << 0), + BTF_TRACE_F_NONAME = (1ULL << 1), + BTF_TRACE_F_PTR_RAW = (1ULL << 2), + BTF_TRACE_F_ZERO = (1ULL << 3), +}; + #endif /* _UAPI__LINUX_BPF_H__ */