lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1516827819-133806-1-git-send-email-sowmini.varadhan@oracle.com>
Date:   Wed, 24 Jan 2018 13:03:39 -0800
From:   Sowmini Varadhan <sowmini.varadhan@...cle.com>
To:     sowmini.varadhan@...cle.com
Cc:     davem@...emloft.net, sowmini.varadhan@...cle.com,
        santosh.shilimkar@...cle.com
Subject: [PATCH net-next] rds: tcp: per-netns flag to stop new connection creation when rds-tcp is being dismantled

An rds_connection can get added during netns deletion between lines 528
and 529 of

  506 static void rds_tcp_kill_sock(struct net *net)
  :
  /* code to pull out all the rds_connections that should be destroyed */
  :
  528         spin_unlock_irq(&rds_tcp_conn_lock);
  529         list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
  530                 rds_conn_destroy(tc->t_cpath->cp_conn);

Such an rds_connection would miss out the rds_conn_destroy()
loop (that cancels all pending work) and (if it was scheduled
after netns deletion) could trigger the use-after-free.

A similar race-window exists for the module unload path
in rds_tcp_exit -> rds_tcp_destroy_conns

To avoid the addition of new rds_connections during kill_sock
or netns_delete, this patch introduces a per-netns flag,
RTN_DELETE_PENDING, that will cause RDS connection creation to fail.
RCU is used to make sure that we wait for the critical
section of __rds_conn_create threads (that may have started before
the setting of RTN_DELETE_PENDING) to complete before starting
the connection destruction.

Reported-by: syzbot+bbd8e9a06452cc48059b@...kaller.appspotmail.com
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@...cle.com>
---
 net/rds/connection.c |    3 ++
 net/rds/tcp.c        |   82 ++++++++++++++++++++++++++++++++-----------------
 net/rds/tcp.h        |    1 +
 3 files changed, 57 insertions(+), 29 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index b10c0ef..2ae539d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -220,8 +220,10 @@ static void __rds_conn_path_init(struct rds_connection *conn,
 				     is_outgoing);
 		conn->c_path[i].cp_index = i;
 	}
+	rcu_read_lock();
 	ret = trans->conn_alloc(conn, gfp);
 	if (ret) {
+		rcu_read_unlock();
 		kfree(conn->c_path);
 		kmem_cache_free(rds_conn_slab, conn);
 		conn = ERR_PTR(ret);
@@ -283,6 +285,7 @@ static void __rds_conn_path_init(struct rds_connection *conn,
 		}
 	}
 	spin_unlock_irqrestore(&rds_conn_lock, flags);
+	rcu_read_unlock();
 
 out:
 	return conn;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 9920d2f..2bdd3cc 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -274,14 +274,13 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 static void rds_tcp_conn_free(void *arg)
 {
 	struct rds_tcp_connection *tc = arg;
-	unsigned long flags;
 
 	rdsdebug("freeing tc %p\n", tc);
 
-	spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+	spin_lock_bh(&rds_tcp_conn_lock);
 	if (!tc->t_tcp_node_detached)
 		list_del(&tc->t_tcp_node);
-	spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+	spin_unlock_bh(&rds_tcp_conn_lock);
 
 	kmem_cache_free(rds_tcp_conn_slab, tc);
 }
@@ -296,7 +295,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 		tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
 		if (!tc) {
 			ret = -ENOMEM;
-			break;
+			goto fail;
 		}
 		mutex_init(&tc->t_conn_path_lock);
 		tc->t_sock = NULL;
@@ -306,14 +305,25 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 
 		conn->c_path[i].cp_transport_data = tc;
 		tc->t_cpath = &conn->c_path[i];
+		tc->t_tcp_node_detached = true;
 
-		spin_lock_irq(&rds_tcp_conn_lock);
-		tc->t_tcp_node_detached = false;
-		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
-		spin_unlock_irq(&rds_tcp_conn_lock);
 		rdsdebug("rds_conn_path [%d] tc %p\n", i,
 			 conn->c_path[i].cp_transport_data);
 	}
+	spin_lock_bh(&rds_tcp_conn_lock);
+	if (rds_tcp_netns_delete_pending(rds_conn_net(conn))) {
+		rdsdebug("RTN_DELETE_PENDING\n");
+		ret = -ENETDOWN;
+		spin_unlock_bh(&rds_tcp_conn_lock);
+		goto fail;
+	}
+	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+		tc = conn->c_path[i].cp_transport_data;
+		tc->t_tcp_node_detached = false;
+		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
+	}
+	spin_unlock_bh(&rds_tcp_conn_lock);
+fail:
 	if (ret) {
 		for (j = 0; j < i; j++)
 			rds_tcp_conn_free(conn->c_path[j].cp_transport_data);
@@ -332,23 +342,6 @@ static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
 	return false;
 }
 
-static void rds_tcp_destroy_conns(void)
-{
-	struct rds_tcp_connection *tc, *_tc;
-	LIST_HEAD(tmp_list);
-
-	/* avoid calling conn_destroy with irqs off */
-	spin_lock_irq(&rds_tcp_conn_lock);
-	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
-			list_move_tail(&tc->t_tcp_node, &tmp_list);
-	}
-	spin_unlock_irq(&rds_tcp_conn_lock);
-
-	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
-		rds_conn_destroy(tc->t_cpath->cp_conn);
-}
-
 static void rds_tcp_exit(void);
 
 struct rds_transport rds_tcp_transport = {
@@ -382,8 +375,30 @@ struct rds_tcp_net {
 	struct ctl_table *ctl_table;
 	int sndbuf_size;
 	int rcvbuf_size;
+	unsigned long  rtn_flags;
+#define	RTN_DELETE_PENDING	0
 };
 
+static void rds_tcp_destroy_conns(void)
+{
+	struct rds_tcp_connection *tc, *_tc;
+	struct rds_tcp_net *rtn = net_generic(&init_net, rds_tcp_netid);
+	LIST_HEAD(tmp_list);
+
+	/* avoid calling conn_destroy with irqs off */
+	set_bit(RTN_DELETE_PENDING, &rtn->rtn_flags);
+	synchronize_rcu();
+	spin_lock_bh(&rds_tcp_conn_lock);
+	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+			list_move_tail(&tc->t_tcp_node, &tmp_list);
+	}
+	spin_unlock_bh(&rds_tcp_conn_lock);
+
+	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
+		rds_conn_destroy(tc->t_cpath->cp_conn);
+}
+
 /* All module specific customizations to the RDS-TCP socket should be done in
  * rds_tcp_tune() and applied after socket creation.
  */
@@ -504,6 +519,13 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
 	.size = sizeof(struct rds_tcp_net),
 };
 
+bool rds_tcp_netns_delete_pending(struct net *net)
+{
+	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+	return test_bit(RTN_DELETE_PENDING, &rtn->rtn_flags);
+}
+
 static void rds_tcp_kill_sock(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
@@ -513,7 +535,9 @@ static void rds_tcp_kill_sock(struct net *net)
 
 	rtn->rds_tcp_listen_sock = NULL;
 	rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-	spin_lock_irq(&rds_tcp_conn_lock);
+	set_bit(RTN_DELETE_PENDING, &rtn->rtn_flags);
+	synchronize_rcu();
+	spin_lock_bh(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -526,7 +550,7 @@ static void rds_tcp_kill_sock(struct net *net)
 			tc->t_tcp_node_detached = true;
 		}
 	}
-	spin_unlock_irq(&rds_tcp_conn_lock);
+	spin_unlock_bh(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
 		rds_conn_destroy(tc->t_cpath->cp_conn);
 }
@@ -574,7 +598,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
 
-	spin_lock_irq(&rds_tcp_conn_lock);
+	spin_lock_bh(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -584,7 +608,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 		/* reconnect with new parameters */
 		rds_conn_path_drop(tc->t_cpath, false);
 	}
-	spin_unlock_irq(&rds_tcp_conn_lock);
+	spin_unlock_bh(&rds_tcp_conn_lock);
 }
 
 static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index c6fa080..b07dbd7 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -60,6 +60,7 @@ void rds_tcp_restore_callbacks(struct socket *sock,
 u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
 extern struct rds_transport rds_tcp_transport;
 void rds_tcp_accept_work(struct sock *sk);
+bool rds_tcp_netns_delete_pending(struct net *net);
 
 /* tcp_connect.c */
 int rds_tcp_conn_path_connect(struct rds_conn_path *cp);
-- 
1.7.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ