lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250507224658.47266-1-kuniyu@amazon.com>
Date: Wed, 7 May 2025 15:45:52 -0700
From: Kuniyuki Iwashima <kuniyu@...zon.com>
To: <brauner@...nel.org>
CC: <alexander@...alicyn.com>, <bluca@...ian.org>, <daan.j.demeyer@...il.com>,
	<davem@...emloft.net>, <david@...dahead.eu>, <edumazet@...gle.com>,
	<horms@...nel.org>, <jack@...e.cz>, <jannh@...gle.com>, <kuba@...nel.org>,
	<kuniyu@...zon.com>, <lennart@...ttering.net>,
	<linux-fsdevel@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
	<me@...dnzj.com>, <netdev@...r.kernel.org>, <oleg@...hat.com>,
	<pabeni@...hat.com>, <viro@...iv.linux.org.uk>, <zbyszek@...waw.pl>
Subject: Re: [PATCH v4 04/11] net: reserve prefix

From: Christian Brauner <brauner@...nel.org>
Date: Wed, 07 May 2025 18:13:37 +0200
> Add the reserved "linuxafsk/" prefix for AF_UNIX sockets and require
> CAP_NET_ADMIN in the owning user namespace of the network namespace to
> bind it. This will be used in next patches to support the coredump
> socket but is a generally useful concept.

I really think we shouldn't reserve address and it should be
configurable by users via core_pattern as with the other
coredump types.

AF_UNIX doesn't support SO_REUSEPORT, so once the socket is
dying, user can't start the new coredump listener until it's
fully cleaned up, which adds unnecessary drawback.

The semantic should be same with other types, and the todo
for the coredump service is prepare file (file, process, socket)
that can receive data and set its name to core_pattern.

Also, the abstract socket is namespced by design and there is
no point in enforcing the same restriction to non-initial netns.


> 
> The collision risk is so low that we can just start using it. Userspace
> must already be prepared to retry if a given abstract address isn't
> usable anyway.
> 
> Signed-off-by: Christian Brauner <brauner@...nel.org>
> ---
>  include/uapi/linux/un.h |  2 ++
>  net/unix/af_unix.c      | 39 +++++++++++++++++++++++++++++++++++----
>  2 files changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/include/uapi/linux/un.h b/include/uapi/linux/un.h
> index 0ad59dc8b686..bbd5ad508dfa 100644
> --- a/include/uapi/linux/un.h
> +++ b/include/uapi/linux/un.h
> @@ -5,6 +5,8 @@
>  #include <linux/socket.h>
>  
>  #define UNIX_PATH_MAX	108
> +/* reserved AF_UNIX socket namespace. */
> +#define UNIX_SOCKET_NAMESPACE "linuxafsk/"
>  
>  struct sockaddr_un {
>  	__kernel_sa_family_t sun_family; /* AF_UNIX */
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 472f8aa9ea15..148d008862e7 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -114,6 +114,13 @@ static atomic_long_t unix_nr_socks;
>  static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
>  static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
>  
> +static const struct sockaddr_un linuxafsk_addr = {
> +	.sun_family = AF_UNIX,
> +	.sun_path = "\0"UNIX_SOCKET_NAMESPACE,
> +};
> +
> +#define UNIX_SOCKET_NAMESPACE_ADDR_LEN (offsetof(struct sockaddr_un, sun_path) + sizeof(UNIX_SOCKET_NAMESPACE))
> +
>  /* SMP locking strategy:
>   *    hash table is protected with spinlock.
>   *    each socket state is protected by separate spinlock.
> @@ -436,6 +443,30 @@ static struct sock *__unix_find_socket_byname(struct net *net,
>  	return NULL;
>  }
>  
> +static int unix_may_bind_name(struct net *net, struct sockaddr_un *sunname,
> +			      int len, unsigned int hash)
> +{
> +	struct sock *s;
> +
> +	s = __unix_find_socket_byname(net, sunname, len, hash);
> +	if (s)
> +		return -EADDRINUSE;
> +
> +	/*
> +	 * Check whether this is our reserved prefix and if so ensure
> +	 * that only privileged processes can bind it.
> +	 */
> +	if (UNIX_SOCKET_NAMESPACE_ADDR_LEN <= len &&
> +	    !memcmp(&linuxafsk_addr, sunname, UNIX_SOCKET_NAMESPACE_ADDR_LEN)) {
> +		/* Don't bind the namespace itself. */
> +		if (UNIX_SOCKET_NAMESPACE_ADDR_LEN == len)
> +			return -ECONNREFUSED;
> +		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
> +			return -ECONNREFUSED;
> +	}
> +	return 0;
> +}
> +
>  static inline struct sock *unix_find_socket_byname(struct net *net,
>  						   struct sockaddr_un *sunname,
>  						   int len, unsigned int hash)
> @@ -1258,10 +1289,10 @@ static int unix_autobind(struct sock *sk)
>  	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
>  	unix_table_double_lock(net, old_hash, new_hash);
>  
> -	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
> +	if (unix_may_bind_name(net, addr->name, addr->len, new_hash)) {
>  		unix_table_double_unlock(net, old_hash, new_hash);
>  
> -		/* __unix_find_socket_byname() may take long time if many names
> +		/* unix_may_bind_name() may take long time if many names
>  		 * are already in use.
>  		 */
>  		cond_resched();
> @@ -1379,7 +1410,8 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
>  	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
>  	unix_table_double_lock(net, old_hash, new_hash);
>  
> -	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
> +	err = unix_may_bind_name(net, addr->name, addr->len, new_hash);
> +	if (err)
>  		goto out_spin;
>  
>  	__unix_set_addr_hash(net, sk, addr, new_hash);
> @@ -1389,7 +1421,6 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
>  
>  out_spin:
>  	unix_table_double_unlock(net, old_hash, new_hash);
> -	err = -EADDRINUSE;
>  out_mutex:
>  	mutex_unlock(&u->bindlock);
>  out:
> 
> -- 
> 2.47.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ