lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250516220920.1142578-2-ematsumiya@suse.de>
Date: Fri, 16 May 2025 19:09:19 -0300
From: Enzo Matsumiya <ematsumiya@...e.de>
To: netdev@...r.kernel.org
Cc: Enzo Matsumiya <ematsumiya@...e.de>,
	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>,
	Simon Horman <horms@...nel.org>,
	Kuniyuki Iwashima <kuniyu@...zon.com>,
	Willem de Bruijn <willemb@...gle.com>,
	linux-kernel@...r.kernel.org
Subject: [RFC PATCH 1/1] net: socket: hint netns refcounting through @kern arg

Some modules require the netns a socket resides in to be properly
refcounted to avoid UAF (e.g. netns is gone before socket, socket goes
away before TCP timers kicking in).

Such refcounting is done based on sk->sk_net_refcnt, which, in turn, is
set based on @kern arg -- kernel sockets are not netns refcounted by
default.

In order to deal with that, modules are allocating a kernel socket, and,
right after, calling sk_net_refcnt_upgrade() (which sets sk->sk_net_refcnt
to 1 and do the proper setup for the netns refcounter).

This patch aims to centralize this behaviour on sk_alloc() by changing
the @kern arg to accept newly added SOCK_NETNS_REFCNT_* values.

Practically it only adds a third value which means "kernel socket with
netns refcounting".

To maintain compatibility with the previous boolean behaviour
(@kern/!@...n), SOCK_NETNS_REFCNT_USER is 0 and
SOCK_NETNS_REFCNT_KERN_* > 0.

Also add a sock_create_netns() wrapper.  Callers that need a kernel
socket with netns refcounting may use this wrapper.

Follow up patch will update callers of sk_net_refcnt_upgrade()
to use this new option instead.

Signed-off-by: Enzo Matsumiya <ematsumiya@...e.de>
---
 include/linux/net.h | 15 +++++++++++++++
 net/core/sock.c     | 10 ++++++----
 net/socket.c        | 27 +++++++++++++++++++++++++--
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 0ff950eecc6b..bf5e2e68cee5 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -247,6 +247,20 @@ enum {
 	SOCK_WAKE_URG,
 };
 
+/*
+ * sock netns refcounting modes:
+ *
+ * @SOCK_NETNS_REFCNT_USER: user sockets always hold a netns reference
+ * @SOCK_NETNS_REFCNT_KERN_NONE: kernel socket will not hold active netns reference
+ * @SOCK_NETNS_REFCNT_KERN_ANY: kernel socket will hold active reference for any netns
+ *				(but init_net)
+ */
+enum {
+	SOCK_NETNS_REFCNT_USER,
+	SOCK_NETNS_REFCNT_KERN_NONE,
+	SOCK_NETNS_REFCNT_KERN_ANY,
+};
+
 int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
@@ -255,6 +269,7 @@ int __sock_create(struct net *net, int family, int type, int proto,
 		  struct socket **res, int kern);
 int sock_create(int family, int type, int proto, struct socket **res);
 int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
+int sock_create_netns(struct net *net, int family, int type, int protocol, struct socket **res);
 int sock_create_lite(int family, int type, int proto, struct socket **res);
 struct socket *sock_alloc(void);
 void sock_release(struct socket *sock);
diff --git a/net/core/sock.c b/net/core/sock.c
index e54449c9ab0b..1b987d47e4d8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2244,7 +2244,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
  *	@family: protocol family
  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
  *	@prot: struct proto associated with this new sock instance
- *	@kern: is this to be a kernel socket?
+ *	@kern: hint for netns refcounting (%SOCK_NETNS_REFCNT_USER, ...)
  */
 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		      struct proto *prot, int kern)
@@ -2259,13 +2259,15 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		 * why we need sk_prot_creator -acme
 		 */
 		sk->sk_prot = sk->sk_prot_creator = prot;
-		sk->sk_kern_sock = kern;
+		sk->sk_kern_sock = !!kern;
 		sock_lock_init(sk);
-		sk->sk_net_refcnt = kern ? 0 : 1;
-		if (likely(sk->sk_net_refcnt)) {
+		if (likely(kern == SOCK_NETNS_REFCNT_USER) ||
+		    (kern == SOCK_NETNS_REFCNT_KERN_ANY && !net_eq(net, &init_net))) {
+			sk->sk_net_refcnt = 1;
 			get_net_track(net, &sk->ns_tracker, priority);
 			sock_inuse_add(net, 1);
 		} else {
+			sk->sk_net_refcnt = 0;
 			net_passive_inc(net);
 			__netns_tracker_alloc(net, &sk->ns_tracker,
 					      false, priority);
diff --git a/net/socket.c b/net/socket.c
index 9a0e720f0859..9cce213b3fc2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1459,11 +1459,12 @@ EXPORT_SYMBOL(sock_wake_async);
  *	@type: communication type (SOCK_STREAM, ...)
  *	@protocol: protocol (0, ...)
  *	@res: new socket
- *	@kern: boolean for kernel space sockets
+ *	@kern: hint for netns refcounting (%SOCK_NETNS_REFCNT_USER, ...)
  *
  *	Creates a new socket and assigns it to @res, passing through LSM.
  *	Returns 0 or an error. On failure @res is set to %NULL. @kern must
- *	be set to true if the socket resides in kernel space.
+ *	be set to %SOCK_NETNS_REFCNT_* -- handled as boolean in most places,
+ *	effectively handled only in sk_alloc().
  *	This function internally uses GFP_KERNEL.
  */
 
@@ -1609,6 +1610,7 @@ EXPORT_SYMBOL(sock_create);
  *	@res: new socket
  *
  *	A wrapper around __sock_create().
+ *	Created socket will not hold a reference on @net.
  *	Returns 0 or an error. This function internally uses GFP_KERNEL.
  */
 
@@ -1618,6 +1620,27 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
 }
 EXPORT_SYMBOL(sock_create_kern);
 
+/**
+ *	sock_create_netns - creates a socket (kernel space)
+ *	@net: net namespace
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *
+ *	A wrapper around __sock_create().
+ *	If @net == %init_net (checked in sk_alloc), created socket will
+ *	not hold a reference on @net (i.e. same as sock_create_kern).
+ *	Otherwise, created socket will hold a reference on @net.
+ *	Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
+int sock_create_netns(struct net *net, int family, int type, int protocol, struct socket **res)
+{
+	return __sock_create(net, family, type, protocol, res, SOCK_NETNS_REFCNT_KERN_ANY);
+}
+EXPORT_SYMBOL(sock_create_netns);
+
 static struct socket *__sys_socket_create(int family, int type, int protocol)
 {
 	struct socket *sock;
-- 
2.48.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ