From: Adrian Hunter adrian.hunter@intel.com
commit 6cc447964555df209c590756bd804d3bb9ce1fe0 upstream.
Originally, (refer commit f90d194a867a5a1d ("perf evlist: Do not poll events that use the system_wide flag") there wasn't much reason to poll system-wide events because:
1. The mmaps get "merged" via set-output anyway (the per-cpu case) 2. perf reads all mmaps when any event is woken 3. system-wide mmaps do not fill up as fast as the mmaps for user selected events
But there was 1 reason not to poll which was that it prevented correct termination due to POLLHUP on all user selected events. That issue is now easily resolved by using fdarray_flag__nonfilterable.
With the advent of commit ae4f8ae16a078964 ("libperf evlist: Allow mixing per-thread and per-cpu mmaps"), system-wide mmaps can be used also in the per-thread case where reason 1 does not apply.
Fix the omission of system-wide events from polling by using the fdarray_flag__nonfilterable flag.
Example:
Before:
$ perf record --no-bpf-event -vvv -e intel_pt// --per-thread uname 2>err.txt Linux $ grep 'sys_perf_event_open.*=|pollfd' err.txt sys_perf_event_open: pid 155076 cpu -1 group_fd -1 flags 0x8 = 5 sys_perf_event_open: pid 155076 cpu -1 group_fd -1 flags 0x8 = 6 sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 = 7 sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 = 9 sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 = 10 sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 = 11 sys_perf_event_open: pid -1 cpu 4 group_fd -1 flags 0x8 = 12 sys_perf_event_open: pid -1 cpu 5 group_fd -1 flags 0x8 = 13 sys_perf_event_open: pid -1 cpu 6 group_fd -1 flags 0x8 = 14 sys_perf_event_open: pid -1 cpu 7 group_fd -1 flags 0x8 = 15 thread_data[0x55fb43c29e80]: pollfd[0] <- event_fd=5 thread_data[0x55fb43c29e80]: pollfd[1] <- event_fd=6 thread_data[0x55fb43c29e80]: pollfd[2] <- non_perf_event fd=4
After:
$ perf record --no-bpf-event -vvv -e intel_pt// --per-thread uname 2>err.txt Linux $ grep 'sys_perf_event_open.*=|pollfd' err.txt sys_perf_event_open: pid 156316 cpu -1 group_fd -1 flags 0x8 = 5 sys_perf_event_open: pid 156316 cpu -1 group_fd -1 flags 0x8 = 6 sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 = 7 sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 = 9 sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 = 10 sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 = 11 sys_perf_event_open: pid -1 cpu 4 group_fd -1 flags 0x8 = 12 sys_perf_event_open: pid -1 cpu 5 group_fd -1 flags 0x8 = 13 sys_perf_event_open: pid -1 cpu 6 group_fd -1 flags 0x8 = 14 sys_perf_event_open: pid -1 cpu 7 group_fd -1 flags 0x8 = 15 thread_data[0x55cc19e58e80]: pollfd[0] <- event_fd=5 thread_data[0x55cc19e58e80]: pollfd[1] <- event_fd=6 thread_data[0x55cc19e58e80]: pollfd[2] <- event_fd=7 thread_data[0x55cc19e58e80]: pollfd[3] <- event_fd=9 thread_data[0x55cc19e58e80]: pollfd[4] <- event_fd=10 thread_data[0x55cc19e58e80]: pollfd[5] <- event_fd=11 thread_data[0x55cc19e58e80]: pollfd[6] <- event_fd=12 thread_data[0x55cc19e58e80]: pollfd[7] <- event_fd=13 thread_data[0x55cc19e58e80]: pollfd[8] <- event_fd=14 thread_data[0x55cc19e58e80]: pollfd[9] <- event_fd=15 thread_data[0x55cc19e58e80]: pollfd[10] <- non_perf_event fd=4
Fixes: ae4f8ae16a078964 ("libperf evlist: Allow mixing per-thread and per-cpu mmaps") Signed-off-by: Adrian Hunter adrian.hunter@intel.com Acked-by: Namhyung Kim namhyung@kernel.org Cc: Ian Rogers irogers@google.com Cc: Jiri Olsa jolsa@kernel.org Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220915122612.81738-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- tools/lib/perf/evlist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 6b1bafe267a4..8ec5b9f344e0 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -441,6 +441,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
perf_evlist__for_each_entry(evlist, evsel) { bool overwrite = evsel->attr.write_backward; + enum fdarray_flags flgs; struct perf_mmap *map; int *output, fd, cpu;
@@ -504,8 +505,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
revent = !overwrite ? POLLIN : 0;
- if (!evsel->system_wide && - perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) { + flgs = evsel->system_wide ? fdarray_flag__nonfilterable : fdarray_flag__default; + if (perf_evlist__add_pollfd(evlist, fd, map, revent, flgs) < 0) { perf_mmap__put(map); return -1; }