[ Upstream commit 283ca526a9bd75aed7350220d7b1f8027d99c3fd ]
When tracing and networking programs are both attached in the system and both use event-output helpers that eventually call into perf_event_output(), then we could end up in a situation where the tracing attached program runs in user context while a cls_bpf program is triggered on that same CPU out of softirq context.
Since both rely on the same per-cpu perf_sample_data, we could potentially corrupt it. This can only ever happen in a combination of the two types; all tracing programs use a bpf_prog_active counter to bail out in case a program is already running on that CPU out of a different context. XDP and cls_bpf programs by themselves don't have this issue as they run in the same context only. Therefore, split both perf_sample_data so they cannot be accessed from each other.
Fixes: 20b9d7ac4852 ("bpf: avoid excessive stack usage for perf_sample_data") Reported-by: Alexei Starovoitov ast@fb.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Tested-by: Song Liu songliubraving@fb.com Acked-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Alexei Starovoitov ast@kernel.org --- kernel/trace/bpf_trace.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index dc498b6..6350f64 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -293,14 +293,13 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { .arg2_type = ARG_ANYTHING, };
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); +static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, - u64 flags, struct perf_raw_record *raw) + u64 flags, struct perf_sample_data *sd) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; @@ -323,8 +322,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP;
- perf_sample_data_init(sd, 0, 0); - sd->raw = raw; perf_event_output(event, sd, regs); return 0; } @@ -332,6 +329,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { + struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); struct perf_raw_record raw = { .frag = { .size = size, @@ -342,7 +340,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL;
- return __bpf_perf_event_output(regs, map, flags, &raw); + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw; + + return __bpf_perf_event_output(regs, map, flags, sd); }
static const struct bpf_func_proto bpf_perf_event_output_proto = { @@ -357,10 +358,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { };
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); +static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) { + struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); struct perf_raw_frag frag = { .copy = ctx_copy, @@ -378,8 +381,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, };
perf_fetch_caller_regs(regs); + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw;
- return __bpf_perf_event_output(regs, map, flags, &raw); + return __bpf_perf_event_output(regs, map, flags, sd); }
BPF_CALL_0(bpf_get_current_task)