On Wed, Oct 30, 2024 at 04:37:37PM +0000, Lorenzo Stoakes wrote:
On Mon, Oct 28, 2024 at 04:06:07PM +0000, Lorenzo Stoakes wrote:
I guess I'll try to adapt that and respin a v7 when I get a chance.
Hm looking at this draft patch, it seems like a total rework of pidfd's across the board right (now all pidfd's will need to be converted to pid_fd)? Correct me if I'm wrong.
If only for the signal case, it seems like overkill to define a whole pid_fd and to use this CLASS() wrapper just for this one instance.
If the intent is to convert _all_ pidfd's to use this type, it feels really out of scope for this series and I think we'd probably instead want to go off and do that as a separate series and put this on hold until that is done.
If instead you mean that we ought to do something like this just for the signal case, it feels like it'd be quite a bit of extra abstraction just used in this one case but nowhere else, I think if you did an abstraction like this it would _have_ to be across the board right?
I agree that the issue is with this one signal case that pins only the fd (rather than this pid) where this 'pinning' doesn't _necessary_ mess around with reference counts.
So we definitely must address this, but the issue you had with the first approach was that I think (correct me if I'm wrong) I was passing a pointer to a struct fd which is not permitted right?
Could we pass the struct fd by value to avoid this? I think we'd have to unfortunately special-case this and probably duplicate some code which is a pity as I liked the idea of abstracting everything to one place, but we can obviously do that.
So I guess to TL;DR it, the options are:
Implement pid_fd everywhere, in which case I will leave off on this series and I guess, if I have time I could look at trying to implement that or perhaps you'd prefer to?
We are good for the sake of this series to special-case a pidfd_to_pid() implementation (used only by the pidfd_send_signal() syscall)
Something else, or I am misunderstanding your point :)
Let me know how you want me to proceed on this as we're at v6 already and I want to be _really_ sure I'm doing what you want here.
I don't think we get away with abstracting it in one place without this ending up a pretty janky api. I need to go back and think about calling conventions for all this stuff. For now I think I'm fine with something like the below that abstracts the api to handle mm/ cleanly and then a special-case for pidfd_send_signal():
diff --git a/kernel/pid.c b/kernel/pid.c index 6131543e7c09..c1857c44d1a3 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -564,15 +564,29 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) */ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags) { - unsigned int f_flags; + unsigned int f_flags = 0; struct pid *pid; struct task_struct *task; + enum pid_type type;
- pid = pidfd_get_pid(pidfd, &f_flags); - if (IS_ERR(pid)) - return ERR_CAST(pid); + switch (pidfd) { + case PIDFD_SELF_THREAD: + type = PIDTYPE_PID; + pid = get_task_pid(current, type); + break; + case PIDFD_SELF_THREAD_GROUP: + type = PIDTYPE_TGID; + pid = get_task_pid(current, type); + break; + default: + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) + return ERR_CAST(pid); + type = PIDTYPE_TGID; + break; + }
- task = get_pid_task(pid, PIDTYPE_TGID); + task = get_pid_task(pid, type); put_pid(pid); if (!task) return ERR_PTR(-ESRCH);
That would get you support for PIDFD_SELF_THREAD and PIDFD_SELF_THREAD_GROUP for process_madvise() and process_mrelease().
And for pidfd_send_signal() we could just open code this for now:
diff --git a/kernel/signal.c b/kernel/signal.c index 989b1cc9116a..a2e4e3a5ee42 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3990,6 +3990,45 @@ static struct pid *pidfd_to_pid(const struct file *file) (PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \ PIDFD_SIGNAL_PROCESS_GROUP)
+static int do_pidfd_send_signal(struct pid *pid, int sig, enum pid_type type, + siginfo_t __user *info, unsigned int flags) +{ + kernel_siginfo_t kinfo; + + switch (flags) { + case PIDFD_SIGNAL_THREAD: + type = PIDTYPE_PID; + break; + case PIDFD_SIGNAL_THREAD_GROUP: + type = PIDTYPE_TGID; + break; + case PIDFD_SIGNAL_PROCESS_GROUP: + type = PIDTYPE_PGID; + break; + } + + if (info) { + int ret = copy_siginfo_from_user_any(&kinfo, info); + if (unlikely(ret)) + return ret; + + if (unlikely(sig != kinfo.si_signo)) + return -EINVAL; + + /* Only allow sending arbitrary signals to yourself. */ + if ((task_pid(current) != pid || type > PIDTYPE_TGID) && + (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) + return -EPERM; + } else { + prepare_kill_siginfo(sig, &kinfo, type); + } + + if (type == PIDTYPE_PGID) + return kill_pgrp_info(sig, &kinfo, pid); + + return kill_pid_info_type(sig, &kinfo, pid, type); +} + /** * sys_pidfd_send_signal - Signal a process through a pidfd * @pidfd: file descriptor of the process @@ -4009,7 +4048,6 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, { int ret; struct pid *pid; - kernel_siginfo_t kinfo; enum pid_type type;
/* Enforce flags be set to 0 until we add an extension. */ @@ -4021,56 +4059,39 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, return -EINVAL;
CLASS(fd, f)(pidfd); - if (fd_empty(f)) - return -EBADF;
- /* Is this a pidfd? */ - pid = pidfd_to_pid(fd_file(f)); - if (IS_ERR(pid)) - return PTR_ERR(pid); + switch (pidfd) { + case PIDFD_SELF_THREAD: + pid = get_task_pid(current, PIDTYPE_PID); + type = PIDTYPE_PID; + break; + case PIDFD_SELF_THREAD_GROUP: + pid = get_task_pid(current, PIDTYPE_TGID); + type = PIDTYPE_TGID; + break; + default: + if (fd_empty(f)) + return -EBADF;
- if (!access_pidfd_pidns(pid)) - return -EINVAL; + /* Is this a pidfd? */ + pid = pidfd_to_pid(fd_file(f)); + if (IS_ERR(pid)) + return PTR_ERR(pid);
- switch (flags) { - case 0: + if (!access_pidfd_pidns(pid)) + return -EINVAL; /* Infer scope from the type of pidfd. */ if (fd_file(f)->f_flags & PIDFD_THREAD) type = PIDTYPE_PID; else type = PIDTYPE_TGID; break; - case PIDFD_SIGNAL_THREAD: - type = PIDTYPE_PID; - break; - case PIDFD_SIGNAL_THREAD_GROUP: - type = PIDTYPE_TGID; - break; - case PIDFD_SIGNAL_PROCESS_GROUP: - type = PIDTYPE_PGID; - break; }
- if (info) { - ret = copy_siginfo_from_user_any(&kinfo, info); - if (unlikely(ret)) - return ret; - - if (unlikely(sig != kinfo.si_signo)) - return -EINVAL; - - /* Only allow sending arbitrary signals to yourself. */ - if ((task_pid(current) != pid || type > PIDTYPE_TGID) && - (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) - return -EPERM; - } else { - prepare_kill_siginfo(sig, &kinfo, type); - } - - if (type == PIDTYPE_PGID) - return kill_pgrp_info(sig, &kinfo, pid); - else - return kill_pid_info_type(sig, &kinfo, pid, type); + ret = do_pidfd_send_signal(pid, sig, type, info, flags); + if (fd_empty(f)) + put_pid(pid); + return ret; }
static int