netdev - Re: [PATCH] net: fix lock_sock_bh/unlock_sock

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 27 May 2010 07:20:18 +0200
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	David Miller <davem@...emloft.net>
Cc:	anton@...ba.org, netdev@...r.kernel.org
Subject: Re: [PATCH] net: fix lock_sock_bh/unlock_sock_bh

Le jeudi 27 mai 2010 à 07:06 +0200, Eric Dumazet a écrit :
> Le mercredi 26 mai 2010 à 21:21 -0700, David Miller a écrit :
> > From: Eric Dumazet <eric.dumazet@...il.com>
> > Date: Thu, 27 May 2010 06:18:46 +0200
> > 
> > > Process context :
> > > 
> > > lock only (sk_lock.slock = locked, sk_lock.owned = ???)
> > > 
> > > So I should add a test on owned. If set (by another thread), we should take the slow path.
> > 
> > Indeed, what you're doing now is broken.
> > 
> > If owned is non-zero when you take the spinlock you have to queue your
> > work, just like how we toss packets into the socket backlog, which is
> > processed by release_sock(), when this happens.
> 
> Here is the patch I am going to test today.
> 
> Anton, could you please test it too, just for sure ?
> 
> Thanks !
> 
> 
> [PATCH] net: fix lock_sock_bh/unlock_sock_bh
> 
> This new sock lock primitive was introduced to speedup some user context
> socket manipulation. But it is unsafe to protect two threads, one using
> regular lock_sock/release_sock, one using lock_sock_bh/unlock_sock_bh
> 
> This patch changes lock_sock_bh to be careful against 'owned' state.
> If owned is found to be set, we must take the slow path.
> lock_sock_bh() now returns a boolean to say if the slow path was taken,
> and this boolean is used at unlock_sock_bh time to call the appropriate
> unlock function.
> 
> After this change, BH are either disabled or enabled during the
> lock_sock_bh/unlock_sock_bh protected section. This might be misleading,
> so we rename these functions to lock_sock_fast()/unlock_sock_fast().
> 
> Reported-by: Anton Blanchard <anton@...ba.org>
> Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
> ---
>  include/net/sock.h  |   20 ++++++++++++++------
>  net/core/datagram.c |    6 ++++--
>  net/core/sock.c     |   32 ++++++++++++++++++++++++++++++++
>  net/ipv4/udp.c      |   14 ++++++++------
>  net/ipv6/udp.c      |    5 +++--
>  5 files changed, 61 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index d2a71b0..ca241ea 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1026,15 +1026,23 @@ extern void release_sock(struct sock *sk);
>  				SINGLE_DEPTH_NESTING)
>  #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
>  
> -static inline void lock_sock_bh(struct sock *sk)
> +extern bool lock_sock_fast(struct sock *sk);
> +/**
> + * unlock_sock_fast - complement of lock_sock_fast
> + * @sk: socket
> + * @slow: slow mode
> + *
> + * fast unlock socket for user context.
> + * If slow mode is on, we call regular release_sock()
> + */
> +static inline void unlock_sock_fast(struct sock *sk, bool slow)
>  {
> -	spin_lock_bh(&sk->sk_lock.slock);
> +	if (slow)
> +		release_sock(sk);
> +	else
> +		spin_unlock_bh(&sk->sk_lock.slock);
>  }
>  
> -static inline void unlock_sock_bh(struct sock *sk)
> -{
> -	spin_unlock_bh(&sk->sk_lock.slock);
> -}
>  
>  extern struct sock		*sk_alloc(struct net *net, int family,
>  					  gfp_t priority,
> diff --git a/net/core/datagram.c b/net/core/datagram.c
> index e009753..f5b6f43 100644
> --- a/net/core/datagram.c
> +++ b/net/core/datagram.c
> @@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram);
>  
>  void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
>  {
> +	bool slow;
> +
>  	if (likely(atomic_read(&skb->users) == 1))
>  		smp_rmb();
>  	else if (likely(!atomic_dec_and_test(&skb->users)))
>  		return;
>  
> -	lock_sock_bh(sk);
> +	slow = lock_sock_fast(sk);
>  	skb_orphan(skb);
>  	sk_mem_reclaim_partial(sk);
> -	unlock_sock_bh(sk);
> +	unlock_sock_fast(sk, slow);
>  
>  	/* skb is now orphaned, can be freed outside of locked section */
>  	__kfree_skb(skb);
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 37fe9b6..7ab6398 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2007,6 +2007,38 @@ void release_sock(struct sock *sk)
>  }
>  EXPORT_SYMBOL(release_sock);
>  
> +/**
> + * lock_sock_fast - fast version of lock_sock
> + * @sk: socket
> + *
> + * This version should be used for very small section, where process wont block
> + * return false if fast path is taken
> + *   sk_lock.slock locked, owned = 0, BH disabled
> + * return true if slow path is taken
> + *   sk_lock.slock unlocked, owned = 1, BH enabled
> + */
> +bool lock_sock_fast(struct sock *sk)
> +{
> +	might_sleep();
> +	spin_lock_bh(&sk->sk_lock.slock);
> +
> +	if (!sk->sk_lock.owned)
> +		/*
> +		 * Note : We must disable BH or risk a deadlock
> +		 */
> +		return false;
> +
> +	__lock_sock(sk);
> +	sk->sk_lock.owned = 1;
> +	spin_unlock(&sk->sk_lock.slock);
> +	/*
> +	 * The sk_lock has mutex_lock() semantics here:
> +	 */
> +	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);

Oops, I missed a local_bh_enable(); here

> +	return true;
> +}
> +EXPORT_SYMBOL(lock_sock_fast);
> +
>  int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
>  {
>  	struct timeval tv;

Here is v2 of patch with local_bh_enable(); added in lock_sock_fast()

[PATCH v2] net: fix lock_sock_bh/unlock_sock_bh

This new sock lock primitive was introduced to speedup some user context
socket manipulation. But it is unsafe to protect two threads, one using
regular lock_sock/release_sock, one using lock_sock_bh/unlock_sock_bh

This patch changes lock_sock_bh to be careful against 'owned' state.
If owned is found to be set, we must take the slow path.
lock_sock_bh() now returns a boolean to say if the slow path was taken,
and this boolean is used at unlock_sock_bh time to call the appropriate
unlock function.

After this change, BH are either disabled or enabled during the
lock_sock_bh/unlock_sock_bh protected section. This might be misleading,
so we rename these functions to lock_sock_fast()/unlock_sock_fast().

Reported-by: Anton Blanchard <anton@...ba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
---
 include/net/sock.h  |   20 ++++++++++++++------
 net/core/datagram.c |    6 ++++--
 net/core/sock.c     |   33 +++++++++++++++++++++++++++++++++
 net/ipv4/udp.c      |   14 ++++++++------
 net/ipv6/udp.c      |    5 +++--
 5 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index d2a71b0..ca241ea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1026,15 +1026,23 @@ extern void release_sock(struct sock *sk);
 				SINGLE_DEPTH_NESTING)
 #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
 
-static inline void lock_sock_bh(struct sock *sk)
+extern bool lock_sock_fast(struct sock *sk);
+/**
+ * unlock_sock_fast - complement of lock_sock_fast
+ * @sk: socket
+ * @slow: slow mode
+ *
+ * fast unlock socket for user context.
+ * If slow mode is on, we call regular release_sock()
+ */
+static inline void unlock_sock_fast(struct sock *sk, bool slow)
 {
-	spin_lock_bh(&sk->sk_lock.slock);
+	if (slow)
+		release_sock(sk);
+	else
+		spin_unlock_bh(&sk->sk_lock.slock);
 }
 
-static inline void unlock_sock_bh(struct sock *sk)
-{
-	spin_unlock_bh(&sk->sk_lock.slock);
-}
 
 extern struct sock		*sk_alloc(struct net *net, int family,
 					  gfp_t priority,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e009753..f5b6f43 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram);
 
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
+	bool slow;
+
 	if (likely(atomic_read(&skb->users) == 1))
 		smp_rmb();
 	else if (likely(!atomic_dec_and_test(&skb->users)))
 		return;
 
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	skb_orphan(skb);
 	sk_mem_reclaim_partial(sk);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	/* skb is now orphaned, can be freed outside of locked section */
 	__kfree_skb(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 37fe9b6..2cf7f9f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2007,6 +2007,39 @@ void release_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(release_sock);
 
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken
+ *   sk_lock.slock locked, owned = 0, BH disabled
+ * return true if slow path is taken
+ *   sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+bool lock_sock_fast(struct sock *sk)
+{
+	might_sleep();
+	spin_lock_bh(&sk->sk_lock.slock);
+
+	if (!sk->sk_lock.owned)
+		/*
+		 * Note : We must disable BH
+		 */
+		return false;
+
+	__lock_sock(sk);
+	sk->sk_lock.owned = 1;
+	spin_unlock(&sk->sk_lock.slock);
+	/*
+	 * The sk_lock has mutex_lock() semantics here:
+	 */
+	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+	local_bh_enable();
+	return true;
+}
+EXPORT_SYMBOL(lock_sock_fast);
+
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct timeval tv;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9de6a69..b9d0d40 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1063,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk)
 	spin_unlock_bh(&rcvq->lock);
 
 	if (!skb_queue_empty(&list_kill)) {
-		lock_sock_bh(sk);
+		bool slow = lock_sock_fast(sk);
+
 		__skb_queue_purge(&list_kill);
 		sk_mem_reclaim_partial(sk);
-		unlock_sock_bh(sk);
+		unlock_sock_fast(sk, slow);
 	}
 	return res;
 }
@@ -1123,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
+	bool slow;
 
 	/*
 	 *	Check any passed addresses
@@ -1197,10 +1199,10 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
 		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	if (noblock)
 		return -EAGAIN;
@@ -1625,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
-	lock_sock_bh(sk);
+	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 }
 
 /*
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d7a2c0..87be586 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	int is_udp4;
+	bool slow;
 
 	if (addr_len)
 		*addr_len=sizeof(struct sockaddr_in6);
@@ -424,7 +425,7 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags)) {
 		if (is_udp4)
 			UDP_INC_STATS_USER(sock_net(sk),
@@ -433,7 +434,7 @@ csum_copy_err:
 			UDP6_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
 	}
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html