On Wed, Mar 24, 2021 at 3:12 PM Dmitry Vyukov dvyukov@google.com wrote:
On Wed, 24 Mar 2021 at 15:01, Peter Zijlstra peterz@infradead.org wrote:
One last try, I'll leave it alone now, I promise :-)
This looks like it does what you suggested, thanks! :-)
I'll still need to think about it, because of the potential problem with modify-signal-races and what the user's synchronization story would look like then.
I agree that this looks inherently racy. The attr can't be allocated on stack, user synchronization may be tricky and expensive. The API may provoke bugs and some users may not even realize the race problem.
One potential alternative is use of an opaque u64 context (if we could shove it into the attr). A user can pass a pointer to the attr in there (makes it equivalent to this proposal), or bit-pack size/type (as we want), pass some sequence number or whatever.
Just to clarify what I was thinking about, but did not really state: perf_event_attr_t includes u64 ctx, and we return it back to the user in siginfo_t. Kernel does not treat it in any way. This is a pretty common API pattern in general.
--- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -778,6 +778,9 @@ struct perf_event { void *security; #endif struct list_head sb_list;
unsigned long si_uattr;
unsigned long si_data;
#endif /* CONFIG_PERF_EVENTS */ };
--- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5652,13 +5652,17 @@ static long _perf_ioctl(struct perf_even return perf_event_query_prog_array(event, (void __user *)arg);
case PERF_EVENT_IOC_MODIFY_ATTRIBUTES: {
struct perf_event_attr __user *uattr; struct perf_event_attr new_attr;
int err = perf_copy_attr((struct perf_event_attr __user *)arg,
&new_attr);
int err;
uattr = (struct perf_event_attr __user *)arg;
err = perf_copy_attr(uattr, &new_attr); if (err) return err;
event->si_uattr = (unsigned long)uattr;
return perf_event_modify_attr(event, &new_attr); } default:
@@ -6399,7 +6403,12 @@ static void perf_sigtrap(struct perf_eve clear_siginfo(&info); info.si_signo = SIGTRAP; info.si_code = TRAP_PERF;
info.si_errno = event->attr.type;
info.si_addr = (void *)event->si_data;
info.si_perf = event->si_uattr;
if (event->parent)
info.si_perf = event->parent->si_uattr;
force_sig_info(&info);
}
@@ -6414,8 +6423,8 @@ static void perf_pending_event_disable(s WRITE_ONCE(event->pending_disable, -1);
if (event->attr.sigtrap) {
atomic_set(&event->event_limit, 1); /* rearm event */ perf_sigtrap(event);
atomic_set_release(&event->event_limit, 1); /* rearm event */ return; }
@@ -9121,6 +9130,7 @@ static int __perf_event_overflow(struct if (events && atomic_dec_and_test(&event->event_limit)) { ret = 1; event->pending_kill = POLL_HUP;
event->si_data = data->addr; perf_event_disable_inatomic(event); }
@@ -12011,6 +12021,8 @@ SYSCALL_DEFINE5(perf_event_open, goto err_task; }
event->si_uattr = (unsigned long)attr_uptr;
if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { err = -EOPNOTSUPP;