lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  2 Oct 2019 16:36:44 -0700
From:   Mat Martineau <mathew.j.martineau@...ux.intel.com>
To:     netdev@...r.kernel.org, edumazet@...gle.com
Cc:     Mat Martineau <mathew.j.martineau@...ux.intel.com>,
        cpaasch@...le.com, fw@...len.de, pabeni@...hat.com,
        peter.krystad@...ux.intel.com, dcaratti@...hat.com,
        matthieu.baerts@...sares.net
Subject: [RFC PATCH v2 34/45] mptcp: Make MPTCP socket block/wakeup ignore sk_receive_queue

The MPTCP-level socket doesn't use sk_receive_queue, so it was possible
for mptcp_recvmsg() to remain blocked when there was data ready for it
to read. When the MPTCP socket is waiting for additional data and it
releases the subflow socket lock, the subflow may have incoming packets
ready to process and it sometimes called subflow_data_ready() before the
MPTCP socket called sk_wait_data().

This change adds a new function for the MPTCP socket to use when waiting
for a data ready signal. Atomic bitops with memory barriers are used to
set, test, and clear a MPTCP socket flag that indicates waiting subflow
data. This flag replaces the sk_receive_queue checks used by other
socket types.

Signed-off-by: Mat Martineau <mathew.j.martineau@...ux.intel.com>
---
 net/mptcp/protocol.c | 31 ++++++++++++++++++++++++++++++-
 net/mptcp/protocol.h |  4 ++++
 net/mptcp/subflow.c  |  5 +++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 445800eae767..c8ee20963887 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -367,6 +367,31 @@ static enum mapping_status mptcp_get_mapping(struct sock *ssk)
 	return ret;
 }
 
+static void mptcp_wait_data(struct sock *sk, long *timeo)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	int data_ready;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+	release_sock(sk);
+
+	smp_mb__before_atomic();
+	data_ready = test_and_clear_bit(MPTCP_DATA_READY, &msk->flags);
+	smp_mb__after_atomic();
+
+	if (!data_ready)
+		*timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
+
+	sched_annotate_sleep();
+	lock_sock(sk);
+
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+}
+
 static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
 {
 	WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
@@ -423,6 +448,10 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		u64 old_ack;
 		u32 ssn;
 
+		smp_mb__before_atomic();
+		clear_bit(MPTCP_DATA_READY, &msk->flags);
+		smp_mb__after_atomic();
+
 		status = mptcp_get_mapping(ssk);
 
 		if (status == MAPPING_ADDED) {
@@ -550,7 +579,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 		pr_debug("block");
 		release_sock(ssk);
-		sk_wait_data(sk, &timeo, NULL);
+		mptcp_wait_data(sk, &timeo);
 		lock_sock(ssk);
 	}
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 4a1171b75ec6..56df4f46f313 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -74,6 +74,9 @@
 #define MPTCP_ADDR_IPVERSION_4	4
 #define MPTCP_ADDR_IPVERSION_6	6
 
+/* MPTCP socket flags */
+#define MPTCP_DATA_READY	BIT(0)
+
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
 {
 	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
@@ -117,6 +120,7 @@ struct mptcp_sock {
 	u64		write_seq;
 	u64		ack_seq;
 	u32		token;
+	unsigned long	flags;
 	u16		dport;
 	struct list_head conn_list;
 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 257e52d9595e..7a94049587cc 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -311,6 +311,11 @@ static void subflow_data_ready(struct sock *sk)
 
 	if (parent) {
 		pr_debug("parent=%p", parent);
+
+		smp_mb__before_atomic();
+		set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags);
+		smp_mb__after_atomic();
+
 		parent->sk_data_ready(parent);
 	}
 }
-- 
2.23.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ