From: Tonghao Zhang tong@infragraf.org
[ Upstream commit 377c16fa3f3c60d21e4b05314c8be034ce37f2eb ]
The number of online cpu may be not equal to possible cpu. "bpftool prog profile" can not create pmu event on possible but on online cpu.
$ dmidecode -s system-product-name PowerEdge R620 $ cat /sys/devices/system/cpu/possible 0-47 $ cat /sys/devices/system/cpu/online 0-31
Disable cpu dynamically: $ echo 0 > /sys/devices/system/cpu/cpuX/online
If one cpu is offline, perf_event_open will return ENODEV. To fix this issue: * check value returned and skip offline cpu. * close pmu_fd immediately on error path, avoid fd leaking.
Fixes: 47c09d6a9f67 ("bpftool: Introduce "prog profile" command") Signed-off-by: Tonghao Zhang tong@infragraf.org Cc: Quentin Monnet quentin@isovalent.com Cc: Alexei Starovoitov ast@kernel.org Cc: Daniel Borkmann daniel@iogearbox.net Cc: Andrii Nakryiko andrii@kernel.org Cc: Martin KaFai Lau martin.lau@linux.dev Cc: Song Liu song@kernel.org Cc: Yonghong Song yhs@fb.com Cc: John Fastabend john.fastabend@gmail.com Cc: KP Singh kpsingh@kernel.org Cc: Stanislav Fomichev sdf@google.com Cc: Hao Luo haoluo@google.com Cc: Jiri Olsa jolsa@kernel.org Acked-by: John Fastabend john.fastabend@gmail.com Link: https://lore.kernel.org/r/20230202131701.29519-1-tong@infragraf.org Signed-off-by: Martin KaFai Lau martin.lau@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/bpf/bpftool/prog.c | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cfc9fdc1e8634..e87738dbffc10 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2233,10 +2233,38 @@ static void profile_close_perf_events(struct profiler_bpf *obj) profile_perf_event_cnt = 0; }
+static int profile_open_perf_event(int mid, int cpu, int map_fd) +{ + int pmu_fd; + + pmu_fd = syscall(__NR_perf_event_open, &metrics[mid].attr, + -1 /*pid*/, cpu, -1 /*group_fd*/, 0); + if (pmu_fd < 0) { + if (errno == ENODEV) { + p_info("cpu %d may be offline, skip %s profiling.", + cpu, metrics[mid].name); + profile_perf_event_cnt++; + return 0; + } + return -1; + } + + if (bpf_map_update_elem(map_fd, + &profile_perf_event_cnt, + &pmu_fd, BPF_ANY) || + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + close(pmu_fd); + return -1; + } + + profile_perf_events[profile_perf_event_cnt++] = pmu_fd; + return 0; +} + static int profile_open_perf_events(struct profiler_bpf *obj) { unsigned int cpu, m; - int map_fd, pmu_fd; + int map_fd;
profile_perf_events = calloc( sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); @@ -2255,17 +2283,11 @@ static int profile_open_perf_events(struct profiler_bpf *obj) if (!metrics[m].selected) continue; for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { - pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr, - -1/*pid*/, cpu, -1/*group_fd*/, 0); - if (pmu_fd < 0 || - bpf_map_update_elem(map_fd, &profile_perf_event_cnt, - &pmu_fd, BPF_ANY) || - ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + if (profile_open_perf_event(m, cpu, map_fd)) { p_err("failed to create event %s on cpu %d", metrics[m].name, cpu); return -1; } - profile_perf_events[profile_perf_event_cnt++] = pmu_fd; } } return 0;