[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260204-bergung-abhilfe-073d732bc51f@brauner>
Date: Wed, 4 Feb 2026 15:39:53 +0100
From: Christian Brauner <brauner@...nel.org>
To: Alexey Gladkov <legion@...nel.org>
Cc: Dan Klishch <danilklishch@...il.com>,
Al Viro <viro@...iv.linux.org.uk>, "Eric W . Biederman" <ebiederm@...ssion.com>,
Kees Cook <keescook@...omium.org>, containers@...ts.linux-foundation.org,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v7 2/5] proc: subset=pid: Show /proc/self/net only for
CAP_NET_ADMIN
On Tue, Jan 13, 2026 at 10:20:34AM +0100, Alexey Gladkov wrote:
> Cache the mounters credentials and allow access to the net directories
> contingent of the permissions of the mounter of proc.
>
> Do not show /proc/self/net when proc is mounted with subset=pid option
> and the mounter does not have CAP_NET_ADMIN.
>
> Signed-off-by: Alexey Gladkov <legion@...nel.org>
> ---
> fs/proc/proc_net.c | 8 ++++++++
> fs/proc/root.c | 5 +++++
> include/linux/proc_fs.h | 1 +
> 3 files changed, 14 insertions(+)
>
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 52f0b75cbce2..6e0ccef0169f 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -23,6 +23,7 @@
> #include <linux/uidgid.h>
> #include <net/net_namespace.h>
> #include <linux/seq_file.h>
> +#include <linux/security.h>
>
> #include "internal.h"
>
> @@ -270,6 +271,7 @@ static struct net *get_proc_task_net(struct inode *dir)
> struct task_struct *task;
> struct nsproxy *ns;
> struct net *net = NULL;
> + struct proc_fs_info *fs_info = proc_sb_info(dir->i_sb);
>
> rcu_read_lock();
> task = pid_task(proc_pid(dir), PIDTYPE_PID);
> @@ -282,6 +284,12 @@ static struct net *get_proc_task_net(struct inode *dir)
> }
> rcu_read_unlock();
>
> + if (net && (fs_info->pidonly == PROC_PIDONLY_ON) &&
> + security_capable(fs_info->mounter_cred, net->user_ns, CAP_NET_ADMIN, CAP_OPT_NONE) < 0) {
> + put_net(net);
> + net = NULL;
> + }
> +
> return net;
> }
>
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index d8ca41d823e4..ed8a101d09d3 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -254,6 +254,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
> return -ENOMEM;
>
> fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
> + fs_info->mounter_cred = get_cred(fc->cred);
> proc_apply_options(fs_info, fc, current_user_ns());
>
> /* User space would break if executables or devices appear on proc */
> @@ -303,6 +304,9 @@ static int proc_reconfigure(struct fs_context *fc)
>
> sync_filesystem(sb);
>
> + put_cred(fs_info->mounter_cred);
> + fs_info->mounter_cred = get_cred(fc->cred);
Afaict, this races with get_proc_task_net(). You need a synchronization
mechanism here so that get_proc_task_net() doesn't risk accessing
invalid mounter creds while someone concurrently updates the creds.
Proposal how to fix that below.
But I'm kinda torn here anyway whether we want that credential change on
remount. The problem is that someone might inadvertently allow access to
/proc/<pid>/net as a side-effect simply because they remounted procfs.
But they never had a chance to prevent this.
I think it's best if mounter_creds stays fixed just as they do for
overlayfs. So we don't allow them to change on reconfigure. That also
makes all of the code I hinted at below pointless.
If we ever want to change the credentials it's easier to add a mount
option to procfs like I did for overlayfs.
_Untested_ patches:
First, the preparatory patch diff (no functional changes intended):
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 52f0b75cbce2..81825e5819b8 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -268,19 +268,19 @@ EXPORT_SYMBOL_GPL(proc_create_net_single_write);
static struct net *get_proc_task_net(struct inode *dir)
{
struct task_struct *task;
- struct nsproxy *ns;
- struct net *net = NULL;
+ struct net *net;
- rcu_read_lock();
+ guard(rcu)();
task = pid_task(proc_pid(dir), PIDTYPE_PID);
- if (task != NULL) {
- task_lock(task);
- ns = task->nsproxy;
- if (ns != NULL)
- net = get_net(ns->net_ns);
- task_unlock(task);
+ if (!task)
+ return NULL;
+
+ scoped_guard(task_lock, task) {
+ struct nsproxy *ns = task->nsproxy;
+ if (!ns)
+ return NULL;
+ net = get_net(ns->net_ns);
}
- rcu_read_unlock();
return net;
}
And then on top of it something like:
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 81825e5819b8..47dc9806395c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -269,6 +269,8 @@ static struct net *get_proc_task_net(struct inode *dir)
{
struct task_struct *task;
struct net *net;
+ struct proc_fs_info *fs_info;
+ const struct cred *cred;
guard(rcu)();
task = pid_task(proc_pid(dir), PIDTYPE_PID);
@@ -282,6 +284,15 @@ static struct net *get_proc_task_net(struct inode *dir)
net = get_net(ns->net_ns);
}
+ fs_info = proc_sb_info(dir->i_sb);
+ if (fs_info->pidonly != PROC_PIDONLY_ON)
+ return net;
+
+ cred = rcu_dereference(fs_info->mounter_cred);
+ if (security_capable(cred, net->user_ns, CAP_NET_ADMIN, CAP_OPT_NONE) != 0) {
+ put_net(net);
+ return NULL;
+ }
return net;
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index d8ca41d823e4..68397900dab7 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -300,11 +300,15 @@ static int proc_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
struct proc_fs_info *fs_info = proc_sb_info(sb);
+ const struct cred *cred;
sync_filesystem(sb);
- proc_apply_options(fs_info, fc, current_user_ns());
- return 0;
+ cred = rcu_replace_pointer(fs_info->mounter_cred, get_cred(fc->cred),
+ lockdep_is_held(&sb->s_umount));
+ put_cred(cred);
+
+ return proc_apply_options(sb, fc, current_user_ns());
}
static int proc_get_tree(struct fs_context *fc)
Powered by blists - more mailing lists