Adds bit perf_event_attr::remove_on_exec, to support removing an event from a task on exec.
This option supports the case where an event is supposed to be process-wide only, and should not propagate beyond exec, to limit monitoring to the original process image only.
Signed-off-by: Marco Elver elver@google.com --- v2: * Add patch to series. --- include/uapi/linux/perf_event.h | 3 ++- kernel/events/core.c | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 813efb65fea8..8c5b9f5ad63f 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -390,7 +390,8 @@ struct perf_event_attr { text_poke : 1, /* include text poke events */ build_id : 1, /* use build id in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ - __reserved_1 : 28; + remove_on_exec : 1, /* event is removed from task on exec */ + __reserved_1 : 27;
union { __u32 wakeup_events; /* wakeup every n events */ diff --git a/kernel/events/core.c b/kernel/events/core.c index a8382e6c907c..bc9e6e35e414 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4195,6 +4195,46 @@ static void perf_event_enable_on_exec(int ctxn) put_ctx(clone_ctx); }
+static void perf_remove_from_owner(struct perf_event *event); +static void perf_event_exit_event(struct perf_event *child_event, + struct perf_event_context *child_ctx, + struct task_struct *child); + +/* + * Removes all events from the current task that have been marked + * remove-on-exec, and feeds their values back to parent events. + */ +static void perf_event_remove_on_exec(void) +{ + int ctxn; + + for_each_task_context_nr(ctxn) { + struct perf_event_context *ctx; + struct perf_event *event, *next; + + ctx = perf_pin_task_context(current, ctxn); + if (!ctx) + continue; + mutex_lock(&ctx->mutex); + + list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { + if (!event->attr.remove_on_exec) + continue; + + if (!is_kernel_event(event)) + perf_remove_from_owner(event); + perf_remove_from_context(event, DETACH_GROUP); + /* + * Remove the event and feed back its values to the + * parent event. + */ + perf_event_exit_event(event, ctx, current); + } + mutex_unlock(&ctx->mutex); + put_ctx(ctx); + } +} + struct perf_read_data { struct perf_event *event; bool group; @@ -7519,6 +7559,8 @@ void perf_event_exec(void) true); } rcu_read_unlock(); + + perf_event_remove_on_exec(); }
struct remote_output { @@ -11600,6 +11642,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (!attr->inherit && attr->inherit_thread) return -EINVAL;
+ if (attr->remove_on_exec && attr->enable_on_exec) + return -EINVAL; + out: return ret;