On Sat, May 31, 2025 at 12:20 AM Blake Jones blakejones@google.com wrote:
The BTF dumper code currently displays arrays of characters as just that - arrays, with each character formatted individually. Sometimes this is what makes sense, but it's nice to be able to treat that array as a string.
This change adds a special case to the btf_dump functionality to allow arrays of single-byte integer values to be printed as character strings. Characters for which isprint() returns false are printed as hex-escaped values. This is enabled when the new ".print_strings" is set to 1 in the btf_dump_type_data_opts structure.
As an example, here's what it looks like to dump the string "hello" using a few different field values for btf_dump_type_data_opts (.compact = 1):
- .print_strings = 0, .skip_names = 0: (char[6])['h','e','l','l','o',]
- .print_strings = 0, .skip_names = 1: ['h','e','l','l','o',]
- .print_strings = 1, .skip_names = 0: (char[6])"hello"
- .print_strings = 1, .skip_names = 1: "hello"
Here's the string "h\xff", dumped with .compact = 1 and .skip_names = 1:
- .print_strings = 0: ['h',-1,]
- .print_strings = 1: "h\xff"
Signed-off-by: Blake Jones blakejones@google.com
tools/lib/bpf/btf.h | 3 +- tools/lib/bpf/btf_dump.c | 51 ++++++++- .../selftests/bpf/prog_tests/btf_dump.c | 102 ++++++++++++++++++ 3 files changed, 154 insertions(+), 2 deletions(-)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4392451d634b..be8e8e26d245 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts { bool compact; /* no newlines/indentation */ bool skip_names; /* skip member/type names */ bool emit_zeroes; /* show 0-valued fields */
bool print_strings; /* print char arrays as strings */
let's use "emit_strings" naming, so it's consistent with emit_zeroes?
size_t :0;
}; -#define btf_dump_type_data_opts__last_field emit_zeroes +#define btf_dump_type_data_opts__last_field print_strings
LIBBPF_API int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 460c3e57fadb..a07dd5accdd8 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -75,6 +75,7 @@ struct btf_dump_data { bool is_array_member; bool is_array_terminated; bool is_array_char;
bool print_strings;
ditto, emit_strings (and maybe put it next to emit_zeroes then)
};
struct btf_dump { @@ -2028,6 +2029,50 @@ static int btf_dump_var_data(struct btf_dump *d, return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); }
+static int btf_dump_string_data(struct btf_dump *d,
const struct btf_type *t,
__u32 id,
const void *data)
+{
const struct btf_array *array = btf_array(t);
__u32 i;
if (!btf_is_int(skip_mods_and_typedefs(d->btf, array->type, NULL)) ||
btf__resolve_size(d->btf, array->type) != 1 ||
!d->typed_dump->print_strings) {
pr_warn("unexpected %s() call for array type %u\n",
__func__, array->type);
return -EINVAL;
}
IMO, a bit too defensive. You literally checked that we have char[] in the caller, I think it's fine not to double-check that here, let's drop this
btf_dump_data_pfx(d);
btf_dump_printf(d, "\"");
for (i = 0; i < array->nelems; i++, data++) {
char c;
if (data >= d->typed_dump->data_end)
return -E2BIG;
c = *(char *)data;
if (c == '\0') {
/* When printing character arrays as strings, NUL bytes
* are always treated as string terminators; they are
* never printed.
*/
break;
what if there are non-zero characters after the terminating zero? should we keep going and if there is any non-zero one, still emit them? or maybe that should be an extra option?... When capturing some data and dumping, it might be important to know all the contents (it might be garbage or not, but you'll still see non-garbage values before \0, so maybe it's fine to always do it?)
}
if (isprint(c))
btf_dump_printf(d, "%c", c);
else
btf_dump_printf(d, "\\x%02x", *(__u8 *)data);
}
btf_dump_printf(d, "\"");
return 0;
+}
[...]
+/*
- String-like types are generally not named, so they need to be
- found this way rather than via btf__find_by_name().
- */
+static int find_char_array_type(struct btf *btf, int nelems) +{
const int nr_types = btf__type_cnt(btf);
const int char_type = btf__find_by_name(btf, "char");
for (int i = 1; i < nr_types; i++) {
const struct btf_type *t;
const struct btf_array *at;
t = btf__type_by_id(btf, i);
if (btf_kind(t) != BTF_KIND_ARRAY)
btf_is_array()
continue;
at = btf_array(t);
if (at->nelems == nelems && at->type == char_type)
return i;
}
return -ENOENT;
+}
+static int btf_dump_string_data(struct btf *btf, struct btf_dump *d,
char *str, struct btf_dump_type_data_opts *opts,
char *ptr, size_t ptr_sz,
const char *expected_val)
+{
char name[64];
size_t type_sz;
int type_id;
int ret = 0;
snprintf(name, sizeof(name), "char[%zu]", ptr_sz);
type_id = find_char_array_type(btf, ptr_sz);
instead of trying to find a suitable type in kernel BTF, just generate a tiny custom BTF with necessary char[N] types? see btf__add_xxx() usage for an example.
if (!ASSERT_GE(type_id, 0, "find type id"))
return -ENOENT;
type_sz = btf__resolve_size(btf, type_id);
str[0] = '\0';
ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, opts);
if (type_sz <= ptr_sz) {
if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
return -EINVAL;
} else {
if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
return -EINVAL;
}
if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
return -EFAULT;
return 0;
+}
[...]