--- James Clark (2): perf cs-etm: Fix decoding for sparse CPU maps perf cs-etm: Test sparse CPU maps
tools/perf/tests/shell/test_arm_coresight.sh | 54 ++++++++++++++++++++++++++++ tools/perf/util/cs-etm.c | 3 +- 2 files changed, 56 insertions(+), 1 deletion(-) --- base-commit: 571d29baa07e83e637075239f379f91353c24ec9 change-id: 20260115-james-perf-coresight-cpu-map-segfault-e250af5aa778
Best regards,
The ETM decoder incorrectly assumed that auxtrace queue indices were equivalent to CPU number. This assumption is used for inserting records into the queue, and for fetching queues when given a CPU number. This assumption held when Perf always opened a dummy event on every CPU, even if the user provided a subset of CPUs on the commandline, resulting in the indices aligning.
For example:
# event : name = cs_etm//u, , id = { 2451, 2452 }, type = 11 (cs_etm), size = 136, config = 0x4010, { sample_period, samp> # event : name = dummy:u, , id = { 2453, 2454, 2455, 2456 }, type = 1 (PERF_TYPE_SOFTWARE), size = 136, config = 0x9 (PER>
0 0 0x200 [0xd0]: PERF_RECORD_ID_INDEX nr: 6 ... id: 2451 idx: 2 cpu: 2 tid: -1 ... id: 2452 idx: 3 cpu: 3 tid: -1 ... id: 2453 idx: 0 cpu: 0 tid: -1 ... id: 2454 idx: 1 cpu: 1 tid: -1 ... id: 2455 idx: 2 cpu: 2 tid: -1 ... id: 2456 idx: 3 cpu: 3 tid: -1
Since commit 811082e4b668 ("perf parse-events: Support user CPUs mixed with threads/processes") the dummy event no longer behaves in this way, making the ETM event indices start from 0 on the first CPU recorded regardless of its ID:
# event : name = cs_etm//u, , id = { 771, 772 }, type = 11 (cs_etm), size = 144, config = 0x4010, { sample_period, sample> # event : name = dummy:u, , id = { 773, 774 }, type = 1 (PERF_TYPE_SOFTWARE), size = 144, config = 0x9 (PERF_COUNT_SW_DUM>
0 0 0x200 [0x90]: PERF_RECORD_ID_INDEX nr: 4 ... id: 771 idx: 0 cpu: 2 tid: -1 ... id: 772 idx: 1 cpu: 3 tid: -1 ... id: 773 idx: 0 cpu: 2 tid: -1 ... id: 774 idx: 1 cpu: 3 tid: -1
This causes the following segfault when decoding:
$ perf record -e cs_etm//u -C 2,3 -- true $ perf report
perf: Segmentation fault -------- backtrace -------- #0 0xaaaabf9fd020 in ui__signal_backtrace setup.c:110 #1 0xffffab5c7930 in __kernel_rt_sigreturn [vdso][930] #2 0xaaaabfb68d30 in cs_etm_decoder__reset cs-etm-decoder.c:85 #3 0xaaaabfb65930 in cs_etm__get_data_block cs-etm.c:2032 #4 0xaaaabfb666fc in cs_etm__run_per_cpu_timeless_decoder cs-etm.c:2551 #5 0xaaaabfb6692c in (cs_etm__process_timeless_queues cs-etm.c:2612 #6 0xaaaabfb63390 in cs_etm__flush_events cs-etm.c:921 #7 0xaaaabfb324c0 in auxtrace__flush_events auxtrace.c:2915 #8 0xaaaabfaac378 in __perf_session__process_events session.c:2285 #9 0xaaaabfaacc9c in perf_session__process_events session.c:2442 #10 0xaaaabf8d3d90 in __cmd_report builtin-report.c:1085 #11 0xaaaabf8d6944 in cmd_report builtin-report.c:1866 #12 0xaaaabf95ebfc in run_builtin perf.c:351 #13 0xaaaabf95eeb0 in handle_internal_command perf.c:404 #14 0xaaaabf95f068 in run_argv perf.c:451 #15 0xaaaabf95f390 in main perf.c:558 #16 0xffffaab97400 in __libc_start_call_main libc_start_call_main.h:74 #17 0xffffaab974d8 in __libc_start_main@@GLIBC_2.34 libc-start.c:128 #18 0xaaaabf8aa8f0 in _start perf[7a8f0]
Fix it by inserting into the queues based on CPU number, rather than using the index.
Fixes: 811082e4b668 ("perf parse-events: Support user CPUs mixed with threads/processes") Signed-off-by: James Clark james.clark@linaro.org --- tools/perf/util/cs-etm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 25d56e0f1c07..12b55c2bc2ca 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -3086,7 +3086,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
if (aux_offset >= auxtrace_event->offset && aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { - struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv; + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, auxtrace_event->cpu);
/* * If this AUX event was inside this buffer somewhere, create a new auxtrace event @@ -3095,6 +3095,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o auxtrace_fragment.auxtrace = *auxtrace_event; auxtrace_fragment.auxtrace.size = aux_size; auxtrace_fragment.auxtrace.offset = aux_offset; + auxtrace_fragment.auxtrace.idx = etmq->queue_nr; file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
We only currently test with default (all CPUs) or --per-thread mode. Different permutations of the "-C" option can affect decoding so add tests for some of them.
Signed-off-by: James Clark james.clark@linaro.org --- tools/perf/tests/shell/test_arm_coresight.sh | 54 ++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+)
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 1c750b67d141..bbf89e944e7b 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -198,6 +198,58 @@ arm_cs_etm_basic_test() { arm_cs_report "CoreSight basic testing with '$*'" $err }
+arm_cs_etm_test_cpu_list() { + echo "Testing sparse CPU list: $1" + perf record -o ${perfdata} -e cs_etm//u -C $1 \ + -- taskset --cpu-list $1 true > /dev/null 2>&1 + perf_script_branch_samples true + err=$? + arm_cs_report "CoreSight sparse CPUs with '$*'" $err +} + +arm_cs_etm_sparse_cpus_test() { + # Iterate for every ETM device + cpus=() + for dev in /sys/bus/event_source/devices/cs_etm/cpu*; do + # Canonicalize the path + dev=`readlink -f $dev` + + # Find the ETM device belonging to which CPU + cpus+=("$(cat $dev/cpu)") + done + + mapfile -t cpus < <(printf '%s\n' "${cpus[@]}" | sort -n) + total=${#cpus[@]} + + # Need more than 1 to test + if [ $total -le 1 ]; then + return 0 + fi + + half=$((total / 2)) + + # First half + first_half=$(IFS=,; echo "${cpus[*]:0:$half}") + arm_cs_etm_test_cpu_list $first_half + + # Second half + second_half=$(IFS=,; echo "${cpus[*]:$half}") + arm_cs_etm_test_cpu_list $second_half + + # Odd list is the same as halves unless >= 4 CPUs + if [ $total -lt 4 ]; then + return 0 + fi + + # Odd indices + odd_cpus=() + for ((i=1; i<total; i+=2)); do + odd_cpus+=("${cpus[$i]}") + done + odd_list=$(IFS=,; echo "${odd_cpus[*]}") + arm_cs_etm_test_cpu_list $odd_list +} + arm_cs_etm_traverse_path_test arm_cs_etm_system_wide_test arm_cs_etm_snapshot_test @@ -211,4 +263,6 @@ arm_cs_etm_basic_test -e cs_etm/timestamp=1/ -a arm_cs_etm_basic_test -e cs_etm/timestamp=0/ arm_cs_etm_basic_test -e cs_etm/timestamp=1/
+arm_cs_etm_sparse_cpus_test + exit $glb_err