[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250507-work-coredump-socket-v4-4-af0ef317b2d0@kernel.org>
Date: Wed, 07 May 2025 18:13:37 +0200
From: Christian Brauner <brauner@...nel.org>
To: Kuniyuki Iwashima <kuniyu@...zon.com>, linux-fsdevel@...r.kernel.org,
Jann Horn <jannh@...gle.com>
Cc: Eric Dumazet <edumazet@...gle.com>, Oleg Nesterov <oleg@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Alexander Viro <viro@...iv.linux.org.uk>,
Daan De Meyer <daan.j.demeyer@...il.com>,
David Rheinsberg <david@...dahead.eu>, Jakub Kicinski <kuba@...nel.org>,
Jan Kara <jack@...e.cz>, Lennart Poettering <lennart@...ttering.net>,
Luca Boccassi <bluca@...ian.org>, Mike Yuan <me@...dnzj.com>,
Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>,
Zbigniew Jędrzejewski-Szmek <zbyszek@...waw.pl>,
linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
Christian Brauner <brauner@...nel.org>,
Alexander Mikhalitsyn <alexander@...alicyn.com>
Subject: [PATCH v4 04/11] net: reserve prefix
Add the reserved "linuxafsk/" prefix for AF_UNIX sockets and require
CAP_NET_ADMIN in the owning user namespace of the network namespace to
bind it. This will be used in next patches to support the coredump
socket but is a generally useful concept.
The collision risk is so low that we can just start using it. Userspace
must already be prepared to retry if a given abstract address isn't
usable anyway.
Signed-off-by: Christian Brauner <brauner@...nel.org>
---
include/uapi/linux/un.h | 2 ++
net/unix/af_unix.c | 39 +++++++++++++++++++++++++++++++++++----
2 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/un.h b/include/uapi/linux/un.h
index 0ad59dc8b686..bbd5ad508dfa 100644
--- a/include/uapi/linux/un.h
+++ b/include/uapi/linux/un.h
@@ -5,6 +5,8 @@
#include <linux/socket.h>
#define UNIX_PATH_MAX 108
+/* reserved AF_UNIX socket namespace. */
+#define UNIX_SOCKET_NAMESPACE "linuxafsk/"
struct sockaddr_un {
__kernel_sa_family_t sun_family; /* AF_UNIX */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 472f8aa9ea15..148d008862e7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -114,6 +114,13 @@ static atomic_long_t unix_nr_socks;
static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
+static const struct sockaddr_un linuxafsk_addr = {
+ .sun_family = AF_UNIX,
+ .sun_path = "\0"UNIX_SOCKET_NAMESPACE,
+};
+
+#define UNIX_SOCKET_NAMESPACE_ADDR_LEN (offsetof(struct sockaddr_un, sun_path) + sizeof(UNIX_SOCKET_NAMESPACE))
+
/* SMP locking strategy:
* hash table is protected with spinlock.
* each socket state is protected by separate spinlock.
@@ -436,6 +443,30 @@ static struct sock *__unix_find_socket_byname(struct net *net,
return NULL;
}
+static int unix_may_bind_name(struct net *net, struct sockaddr_un *sunname,
+ int len, unsigned int hash)
+{
+ struct sock *s;
+
+ s = __unix_find_socket_byname(net, sunname, len, hash);
+ if (s)
+ return -EADDRINUSE;
+
+ /*
+ * Check whether this is our reserved prefix and if so ensure
+ * that only privileged processes can bind it.
+ */
+ if (UNIX_SOCKET_NAMESPACE_ADDR_LEN <= len &&
+ !memcmp(&linuxafsk_addr, sunname, UNIX_SOCKET_NAMESPACE_ADDR_LEN)) {
+ /* Don't bind the namespace itself. */
+ if (UNIX_SOCKET_NAMESPACE_ADDR_LEN == len)
+ return -ECONNREFUSED;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -ECONNREFUSED;
+ }
+ return 0;
+}
+
static inline struct sock *unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
int len, unsigned int hash)
@@ -1258,10 +1289,10 @@ static int unix_autobind(struct sock *sk)
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
unix_table_double_lock(net, old_hash, new_hash);
- if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
+ if (unix_may_bind_name(net, addr->name, addr->len, new_hash)) {
unix_table_double_unlock(net, old_hash, new_hash);
- /* __unix_find_socket_byname() may take long time if many names
+ /* unix_may_bind_name() may take long time if many names
* are already in use.
*/
cond_resched();
@@ -1379,7 +1410,8 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
unix_table_double_lock(net, old_hash, new_hash);
- if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
+ err = unix_may_bind_name(net, addr->name, addr->len, new_hash);
+ if (err)
goto out_spin;
__unix_set_addr_hash(net, sk, addr, new_hash);
@@ -1389,7 +1421,6 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
out_spin:
unix_table_double_unlock(net, old_hash, new_hash);
- err = -EADDRINUSE;
out_mutex:
mutex_unlock(&u->bindlock);
out:
--
2.47.2
Powered by blists - more mailing lists