linux-kernel - [PATCH 3.17 012/319] netlink: Re-add locking to netlink

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20141112010954.703530885@linuxfoundation.org>
Date:	Wed, 12 Nov 2014 10:12:30 +0900
From:	Greg Kroah-Hartman <gregkh@...uxfoundation.org>
To:	linux-kernel@...r.kernel.org
Cc:	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	stable@...r.kernel.org, "David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <eric.dumazet@...il.com>,
	"Steinar H. Gunderson" <sgunderson@...foot.com>,
	Thomas Graf <tgraf@...g.ch>
Subject: [PATCH 3.17 012/319] netlink: Re-add locking to netlink_lookup() and seq walker

3.17-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Thomas Graf <tgraf@...g.ch>

[ Upstream commit 78fd1d0ab072d4d9b5f0b7c14a1516665170b565 ]

The synchronize_rcu() in netlink_release() introduces unacceptable
latency. Reintroduce minimal lookup so we can drop the
synchronize_rcu() until socket destruction has been RCUfied.

Cc: David S. Miller <davem@...emloft.net>
Cc: Eric Dumazet <eric.dumazet@...il.com>
Reported-by: Steinar H. Gunderson <sgunderson@...foot.com>
Reported-and-tested-by: Heiko Carstens <heiko.carstens@...ibm.com>
Signed-off-by: Thomas Graf <tgraf@...g.ch>
Signed-off-by: David S. Miller <davem@...emloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
---
 net/netlink/af_netlink.c |   37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -96,6 +96,14 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_
 static int netlink_dump(struct sock *sk);
 static void netlink_skb_destructor(struct sk_buff *skb);
 
+/* nl_table locking explained:
+ * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock
+ * combined with an RCU read-side lock. Insertion and removal are protected
+ * with nl_sk_hash_lock while using RCU list modification primitives and may
+ * run in parallel to nl_table_lock protected lookups. Destruction of the
+ * Netlink socket may only occur *after* nl_table_lock has been acquired
+ * either during or after the socket has been removed from the list.
+ */
 DEFINE_RWLOCK(nl_table_lock);
 EXPORT_SYMBOL_GPL(nl_table_lock);
 static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -109,10 +117,10 @@ EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
 static int lockdep_nl_sk_hash_is_held(void)
 {
 #ifdef CONFIG_LOCKDEP
-	return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1;
-#else
-	return 1;
+	if (debug_locks)
+		return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
 #endif
+	return 1;
 }
 
 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
@@ -1028,11 +1036,13 @@ static struct sock *netlink_lookup(struc
 	struct netlink_table *table = &nl_table[protocol];
 	struct sock *sk;
 
+	read_lock(&nl_table_lock);
 	rcu_read_lock();
 	sk = __netlink_lookup(table, portid, net);
 	if (sk)
 		sock_hold(sk);
 	rcu_read_unlock();
+	read_unlock(&nl_table_lock);
 
 	return sk;
 }
@@ -1257,9 +1267,6 @@ static int netlink_release(struct socket
 	}
 	netlink_table_ungrab();
 
-	/* Wait for readers to complete */
-	synchronize_net();
-
 	kfree(nlk->groups);
 	nlk->groups = NULL;
 
@@ -1281,6 +1288,7 @@ static int netlink_autobind(struct socke
 
 retry:
 	cond_resched();
+	netlink_table_grab();
 	rcu_read_lock();
 	if (__netlink_lookup(table, portid, net)) {
 		/* Bind collision, search negative portid values. */
@@ -1288,9 +1296,11 @@ retry:
 		if (rover > -4097)
 			rover = -4097;
 		rcu_read_unlock();
+		netlink_table_ungrab();
 		goto retry;
 	}
 	rcu_read_unlock();
+	netlink_table_ungrab();
 
 	err = netlink_insert(sk, net, portid);
 	if (err == -EADDRINUSE)
@@ -2921,14 +2931,16 @@ static struct sock *netlink_seq_socket_i
 }
 
 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
+	__acquires(nl_table_lock) __acquires(RCU)
 {
+	read_lock(&nl_table_lock);
 	rcu_read_lock();
 	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct rhashtable *ht;
 	struct netlink_sock *nlk;
 	struct nl_seq_iter *iter;
 	struct net *net;
@@ -2943,19 +2955,19 @@ static void *netlink_seq_next(struct seq
 	iter = seq->private;
 	nlk = v;
 
-	rht_for_each_entry_rcu(nlk, nlk->node.next, node)
+	i = iter->link;
+	ht = &nl_table[i].hash;
+	rht_for_each_entry(nlk, nlk->node.next, ht, node)
 		if (net_eq(sock_net((struct sock *)nlk), net))
 			return nlk;
 
-	i = iter->link;
 	j = iter->hash_idx + 1;
 
 	do {
-		struct rhashtable *ht = &nl_table[i].hash;
 		const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 
 		for (; j < tbl->size; j++) {
-			rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) {
+			rht_for_each_entry(nlk, tbl->buckets[j], ht, node) {
 				if (net_eq(sock_net((struct sock *)nlk), net)) {
 					iter->link = i;
 					iter->hash_idx = j;
@@ -2971,9 +2983,10 @@ static void *netlink_seq_next(struct seq
 }
 
 static void netlink_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
+	__releases(RCU) __releases(nl_table_lock)
 {
 	rcu_read_unlock();
+	read_unlock(&nl_table_lock);
 }
 
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/