[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190617225808.665-28-mathew.j.martineau@linux.intel.com>
Date: Mon, 17 Jun 2019 15:58:02 -0700
From: Mat Martineau <mathew.j.martineau@...ux.intel.com>
To: edumazet@...gle.com, netdev@...r.kernel.org
Cc: Paolo Abeni <pabeni@...hat.com>, cpaasch@...le.com, fw@...len.de,
peter.krystad@...ux.intel.com, dcaratti@...hat.com,
matthieu.baerts@...sares.net
Subject: [RFC PATCH net-next 27/33] mptcp: allow collapsing consecutive sendpages on the same substream
From: Paolo Abeni <pabeni@...hat.com>
If the current sendmsg() lands on the same subflow we used last, we
can try to collapse the data.
Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
net/mptcp/protocol.c | 79 +++++++++++++++++++++++++++++++++-----------
1 file changed, 60 insertions(+), 19 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d51201c09519..3fb0f3163743 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -47,12 +47,25 @@ static struct sock *mptcp_subflow_get_ref(const struct mptcp_sock *msk)
return NULL;
}
+static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
+ const struct sk_buff *skb,
+ const struct mptcp_ext *mpext)
+{
+ if (!tcp_skb_can_collapse_to(skb))
+ return false;
+
+ /* can collapse only if MPTCP level sequence is in order */
+ return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
- struct msghdr *msg, long *timeo)
+ struct msghdr *msg, long *timeo, int *pmss_now,
+ int *ps_goal)
{
+ int mss_now, avail_size, size_goal, ret;
struct mptcp_sock *msk = mptcp_sk(sk);
+ bool collapsed, can_collapse = false;
struct mptcp_ext *mpext = NULL;
- int mss_now, size_goal, ret;
struct page_frag *pfrag;
struct sk_buff *skb;
size_t psize;
@@ -69,8 +82,31 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
/* compute copy limit */
mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
- psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
+ *pmss_now = mss_now;
+ *ps_goal = size_goal;
+ avail_size = size_goal;
+ skb = tcp_write_queue_tail(ssk);
+ if (skb) {
+ mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
+
+ /* Limit the write to the size available in the
+ * current skb, if any, so that we create at most a new skb.
+ * If we run out of space in the current skb (e.g. the window
+ * size shrunk from last sent) a new skb will be allocated even
+ * is collapsing was allowed: collapsing is effectively
+ * disabled.
+ */
+ can_collapse = mptcp_skb_can_collapse_to(msk, skb, mpext);
+ if (!can_collapse)
+ TCP_SKB_CB(skb)->eor = 1;
+ else if (size_goal - skb->len > 0)
+ avail_size = size_goal - skb->len;
+ else
+ can_collapse = false;
+ }
+ psize = min_t(size_t, pfrag->size - pfrag->offset, avail_size);
+ /* Copy to page */
pr_debug("left=%zu", msg_data_left(msg));
psize = copy_page_from_iter(pfrag->page, pfrag->offset,
min_t(size_t, msg_data_left(msg), psize),
@@ -79,14 +115,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (!psize)
return -EINVAL;
- /* Mark the end of the previous write so the beginning of the
- * next write (with its own mptcp skb extension data) is not
- * collapsed.
+ /* tell the TCP stack to delay the push so that we can safely
+ * access the skb after the sendpages call
*/
- skb = tcp_write_queue_tail(ssk);
- if (skb)
- TCP_SKB_CB(skb)->eor = 1;
-
ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
msg->msg_flags | MSG_SENDPAGE_NOTLAST);
if (ret <= 0)
@@ -94,13 +125,16 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (unlikely(ret < psize))
iov_iter_revert(&msg->msg_iter, psize - ret);
- if (skb == tcp_write_queue_tail(ssk))
- pr_err("no new skb %p/%p", sk, ssk);
+ collapsed = skb == tcp_write_queue_tail(ssk);
+ BUG_ON(collapsed && !can_collapse);
+ if (collapsed) {
+ /* when collapsing mpext always exists */
+ mpext->data_len += ret;
+ goto out;
+ }
skb = tcp_write_queue_tail(ssk);
-
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
-
if (mpext) {
memset(mpext, 0, sizeof(*mpext));
mpext->data_seq = msk->write_seq;
@@ -113,22 +147,25 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
pr_debug("data_seq=%llu subflow_seq=%u data_len=%u checksum=%u, dsn64=%d",
mpext->data_seq, mpext->subflow_seq, mpext->data_len,
mpext->checksum, mpext->dsn64);
- } /* TODO: else fallback */
+ }
+ /* TODO: else fallback; allocation can fail, but we can't easily retire
+ * skbs from the write_queue, as we need to roll-back TCP status
+ */
+out:
pfrag->offset += ret;
msk->write_seq += ret;
subflow_ctx(ssk)->rel_write_seq += ret;
- tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
return ret;
}
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
+ int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
size_t copied = 0;
struct sock *ssk;
- int ret = 0;
long timeo;
pr_debug("msk=%p", msk);
@@ -158,14 +195,18 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(ssk);
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
while (msg_data_left(msg)) {
- ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
+ ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
+ &size_goal);
if (ret < 0)
break;
copied += ret;
}
- if (copied > 0)
+ if (copied) {
ret = copied;
+ tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
+ size_goal);
+ }
release_sock(ssk);
release_sock(sk);
--
2.22.0
Powered by blists - more mailing lists