Re: [PATCH RFC v2 2/4] procfs: add "pidns" mount option

25 Jul 2025

On 2025-07-24, Christian Brauner brauner@kernel.org wrote:
...
On Wed, Jul 23, 2025 at 09:18:52AM +1000, Aleksa Sarai wrote:
...
Since the introduction of pid namespaces, their interaction with procfs
has been entirely implicit in ways that require a lot of dancing around
by programs that need to construct sandboxes with different PID
namespaces.
Being able to explicitly specify the pid namespace to use when
constructing a procfs super block will allow programs to no longer need
to fork off a process which does then does unshare(2) / setns(2) and
forks again in order to construct a procfs in a pidns.
So, provide a "pidns" mount option which allows such users to just
explicitly state which pid namespace they want that procfs instance to
use. This interface can be used with fsconfig(2) either with a file
descriptor or a path:
fsconfig(procfd, FSCONFIG_SET_FD, "pidns", NULL, nsfd);
  fsconfig(procfd, FSCONFIG_SET_STRING, "pidns", "/proc/self/ns/pid", 0);
Fwiw, namespace mount options could just be VFS generic mount options.
But it's not something that we need to solve right now.
Yeah if we add this to sysfs it probably should be made generic, but
let's punt this to later. :D
...
...
or with classic mount(2) / mount(8):
// mount -t proc -o pidns=/proc/self/ns/pid proc /tmp/proc
  mount("proc", "/tmp/proc", "proc", MS_..., "pidns=/proc/self/ns/pid");
As this new API is effectively shorthand for setns(2) followed by
mount(2), the permission model for this mirrors pidns_install() to avoid
opening up new attack surfaces by loosening the existing permission
model.
Note that the mount infrastructure also allows userspace to reconfigure
the pidns of an existing procfs mount, which may or may not be useful to
some users.
Signed-off-by: Aleksa Sarai cyphar@cyphar.com
Documentation/filesystems/proc.rst |  6 +++
 fs/proc/root.c                     | 90 +++++++++++++++++++++++++++++++++++---
 2 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 5236cb52e357..c520b9f8a3fd 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -2360,6 +2360,7 @@ The following mount options are supported:
   hidepid=	Set /proc/<pid>/ access mode.
   gid=		Set the group authorized to learn processes information.
   subset=		Show only the specified subset of procfs.

pidns=		Specify a the namespace used by this procfs.
=========	========================================================

hidepid=off or hidepid=0 means classic mode - everybody may access all
@@ -2392,6 +2393,11 @@ information about processes information, just add identd to this group.
 subset=pid hides all top level files and directories in the procfs that
 are not related to tasks.
 
+pidns= specifies a pid namespace (either as a string path to something like
+`/proc/$pid/ns/pid`, or a file descriptor when using `FSCONFIG_SET_FD`) that
+will be used by the procfs instance when translating pids. By default, procfs
+will use the calling process's active pid namespace.



Chapter 5: Filesystem behavior
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ed86ac710384..057c8a125c6e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -38,12 +38,18 @@ enum proc_param {
   Opt_gid,
   Opt_hidepid,
   Opt_subset,
+#ifdef CONFIG_PID_NS

Opt_pidns,

+#endif
 };
 
 static const struct fs_parameter_spec proc_fs_parameters[] = {

fsparam_u32("gid",	Opt_gid),


fsparam_u32("gid",		Opt_gid),
fsparam_string("hidepid",	Opt_hidepid),
fsparam_string("subset",	Opt_subset),

+#ifdef CONFIG_PID_NS

fsparam_file_or_string("pidns",	Opt_pidns),

+#endif
   {}
 };
 
@@ -109,11 +115,67 @@ static int proc_parse_subset_param(struct fs_context *fc, char *value)
   return 0;
 }
 
+#ifdef CONFIG_PID_NS
+static int proc_parse_pidns_param(struct fs_context *fc,

		  struct fs_parameter *param,


		  struct fs_parse_result *result)



+{

struct proc_fs_context *ctx = fc->fs_private;
struct pid_namespace *target, *active = task_active_pid_ns(current);
struct ns_common *ns;
struct file *ns_filp __free(fput) = NULL;

switch (param->type) {
case fs_value_is_file:
/* came throug fsconfig, steal the file reference */


ns_filp = param->file;


param->file = NULL;



This can be shortened to:
ns_filp = no_free_ptr(param->file);
I really need to take a closer look at <linux/cleanup.h>, each time I
look at it I learn about another handy helper.
...
...

break;


case fs_value_is_string:
ns_filp = filp_open(param->string, O_RDONLY, 0);


break;


default:
WARN_ON_ONCE(true);


break;


}
if (!ns_filp)
ns_filp = ERR_PTR(-EBADF);


if (IS_ERR(ns_filp)) {
errorfc(fc, "could not get file from pidns argument");


return PTR_ERR(ns_filp);


}

if (!proc_ns_file(ns_filp))
return invalfc(fc, "pidns argument is not an nsfs file");


ns = get_proc_ns(file_inode(ns_filp));
if (ns->ops->type != CLONE_NEWPID)
return invalfc(fc, "pidns argument is not a pidns file");


target = container_of(ns, struct pid_namespace, ns);

/*
* pidns= is shorthand for joining the pidns to get a fsopen fd, so the


* permission model should be the same as pidns_install().


*/


if (!ns_capable(target->user_ns, CAP_SYS_ADMIN)) {
errorfc(fc, "insufficient permissions to set pidns");


return -EPERM;


}
if (!pidns_is_ancestor(target, active))
return invalfc(fc, "cannot set pidns to non-descendant pidns");



This made me think. If one rewrote this as:
if (!ns_capable(task_active_pidns(current)->user_ns, CAP_SYS_ADMIN))
if (!pidns_is_ancestor(target, active))
that would also work, right? IOW, you'd be checking whether you're
capable over your current pid namespace owning userns and if the target
pidns is an ancestor it's also implied by the first check that you're
capable over it.
The only way this would not be true is if a descendant pidns would be
owned by a userns over which you don't hold privileges and I wondered
whether that's even possible? I don't think it is but maybe you see a
way.
Well, if you run a setuid binary, it could create a pidns that is a
child but is owned by a more privileged userns than you. My main goal
here was to just mirror pidns_install() exactly, to make sure that the
permission model was identical.
...
...


put_pid_ns(ctx->pid_ns);
ctx->pid_ns = get_pid_ns(target);
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
return 0;

+}
+#endif /* CONFIG_PID_NS */



static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
   struct proc_fs_context *ctx = fc->fs_private;
   struct fs_parse_result result;

int opt;


int opt, err;

opt = fs_parse(fc, proc_fs_parameters, param, &result);
   if (opt < 0)
@@ -125,14 +187,24 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
   	break;
 
   case Opt_hidepid:

if (proc_parse_hidepid_param(fc, param))


	return -EINVAL;




err = proc_parse_hidepid_param(fc, param);


if (err)


	return err;

break;

case Opt_subset:

if (proc_parse_subset_param(fc, param->string) < 0)


	return -EINVAL;




err = proc_parse_subset_param(fc, param->string);


if (err)


	return err;


break;




+#ifdef CONFIG_PID_NS

case Opt_pidns:

I think it would be easier if we returned EOPNOTSUPP when !CONFIG_PID_NS
instead of EINVALing this?
...

err = proc_parse_pidns_param(fc, param, &result);


if (err)


	return err;

break;

+#endif
 
   default:
   	return -EINVAL;
@@ -154,6 +226,12 @@ static void proc_apply_options(struct proc_fs_info *fs_info,
   	fs_info->hide_pid = ctx->hidepid;
   if (ctx->mask & (1 << Opt_subset))
   	fs_info->pidonly = ctx->pidonly;
+#ifdef CONFIG_PID_NS

if (ctx->mask & (1 << Opt_pidns)) {
put_pid_ns(fs_info->pid_ns);


fs_info->pid_ns = get_pid_ns(ctx->pid_ns);


}

+#endif
 }
 
 static int proc_fill_super(struct super_block *s, struct fs_context *fc)
-- 
2.50.0
-- 
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
https://www.cyphar.com/

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH RFC v2 2/4] procfs: add "pidns" mount option

Signed-off-by: Aleksa Sarai cyphar@cyphar.com

Chapter 5: Filesystem behavior