[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20191002233655.24323-40-mathew.j.martineau@linux.intel.com>
Date: Wed, 2 Oct 2019 16:36:49 -0700
From: Mat Martineau <mathew.j.martineau@...ux.intel.com>
To: netdev@...r.kernel.org, edumazet@...gle.com
Cc: Paolo Abeni <pabeni@...hat.com>, cpaasch@...le.com, fw@...len.de,
peter.krystad@...ux.intel.com, dcaratti@...hat.com,
matthieu.baerts@...sares.net
Subject: [RFC PATCH v2 39/45] mptcp: rework mptcp_sendmsg_frag to accept optional dfrag
From: Paolo Abeni <pabeni@...hat.com>
This will simplify mptcp-level retransmission implementation
in the next patch. If dfrag is provided by the caller, skip
kernel space memory allocation and use data and metadata
provided by the dfrag itself.
Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
net/mptcp/protocol.c | 133 +++++++++++++++++++++++++------------------
1 file changed, 78 insertions(+), 55 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8319ee807481..6366dfd65aa8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -95,7 +95,7 @@ static struct sock *mptcp_subflow_get_ref(const struct mptcp_sock *msk)
return NULL;
}
-static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
+static inline bool mptcp_skb_can_collapse_to(u64 write_seq,
const struct sk_buff *skb,
const struct mptcp_ext *mpext)
{
@@ -103,7 +103,7 @@ static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
return false;
/* can collapse only if MPTCP level sequence is in order */
- return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
+ return mpext && mpext->data_seq + mpext->data_len == write_seq;
}
static inline bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
@@ -168,31 +168,44 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
}
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
- struct msghdr *msg, long *timeo, int *pmss_now,
+ struct msghdr *msg, struct mptcp_data_frag *dfrag,
+ long *timeo, int *pmss_now,
int *ps_goal)
{
int mss_now, avail_size, size_goal, offset, ret, frag_truesize = 0;
bool dfrag_collapsed, collapsed, can_collapse = false;
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_ext *mpext = NULL;
- struct mptcp_data_frag *dfrag;
+ bool retransmission = !!dfrag;
struct page_frag *pfrag;
struct sk_buff *skb;
+ struct page *page;
+ u64 *write_seq;
size_t psize;
+ int *poffset;
/* use the mptcp page cache so that we can easily move the data
* from one substream to another, but do per subflow memory accounting
+ * Note: pfrag is used only !retransmission, but the compiler if
+ * fooled into a warning if we don't init here
*/
pfrag = sk_page_frag(sk);
- while (!mptcp_page_frag_refill(ssk, pfrag)) {
- ret = sk_stream_wait_memory(ssk, timeo);
- if (ret)
- return ret;
-
- /* id sk_stream_wait_memory() sleeps snd_una can change
- * significantly, refresh the rtx queue
- */
- mptcp_clean_una(sk);
+ if (!retransmission) {
+ while (!mptcp_page_frag_refill(ssk, pfrag)) {
+ ret = sk_stream_wait_memory(ssk, timeo);
+ if (ret)
+ return ret;
+
+ /* id sk_stream_wait_memory() sleeps snd_una can change
+ * significantly, refresh the rtx queue
+ */
+ mptcp_clean_una(sk);
+ }
+ write_seq = &msk->write_seq;
+ page = pfrag->page;
+ } else {
+ write_seq = &dfrag->data_seq;
+ page = dfrag->page;
}
/* compute copy limit */
@@ -211,63 +224,73 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
* SSN association set here
*/
can_collapse = (size_goal - skb->len > 0) &&
- mptcp_skb_can_collapse_to(msk, skb, mpext);
+ mptcp_skb_can_collapse_to(*write_seq, skb, mpext);
if (!can_collapse)
TCP_SKB_CB(skb)->eor = 1;
else
avail_size = size_goal - skb->len;
}
- /* reuse tail pfrag, if possible, or carve a new one from the page
- * allocator
- */
- dfrag = mptcp_rtx_tail(sk);
- offset = pfrag->offset;
- dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
- if (!dfrag_collapsed) {
- dfrag = mptcp_carve_data_frag(msk, pfrag, offset);
+ if (!retransmission) {
+ /* reuse tail pfrag, if possible, or carve a new one from the
+ * page allocator
+ */
+ dfrag = mptcp_rtx_tail(sk);
+ offset = pfrag->offset;
+ dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
+ if (!dfrag_collapsed) {
+ dfrag = mptcp_carve_data_frag(msk, pfrag, offset);
+ offset = dfrag->offset;
+ frag_truesize = dfrag->overhead;
+ }
+ poffset = &pfrag->offset;
+ psize = min_t(size_t, pfrag->size - offset, avail_size);
+
+ /* Copy to page */
+ pr_debug("left=%zu", msg_data_left(msg));
+ psize = copy_page_from_iter(pfrag->page, offset,
+ min_t(size_t, msg_data_left(msg),
+ psize),
+ &msg->msg_iter);
+ pr_debug("left=%zu", msg_data_left(msg));
+ if (!psize)
+ return -EINVAL;
+
+ if (!sk_wmem_schedule(sk, psize + dfrag->overhead))
+ return -ENOMEM;
+ } else {
offset = dfrag->offset;
- frag_truesize = dfrag->overhead;
+ poffset = &dfrag->offset;
+ psize = min_t(size_t, dfrag->data_len, avail_size);
}
- psize = min_t(size_t, pfrag->size - offset, avail_size);
-
- /* Copy to page */
- pr_debug("left=%zu", msg_data_left(msg));
- psize = copy_page_from_iter(pfrag->page, offset,
- min_t(size_t, msg_data_left(msg), psize),
- &msg->msg_iter);
- pr_debug("left=%zu", msg_data_left(msg));
- if (!psize)
- return -EINVAL;
-
- if (!sk_wmem_schedule(sk, psize + dfrag->overhead))
- return -ENOMEM;
/* tell the TCP stack to delay the push so that we can safely
* access the skb after the sendpages call
*/
- ret = do_tcp_sendpages(ssk, pfrag->page, offset, psize,
+ ret = do_tcp_sendpages(ssk, page, offset, psize,
msg->msg_flags | MSG_SENDPAGE_NOTLAST);
if (ret <= 0)
return ret;
frag_truesize += ret;
- if (unlikely(ret < psize))
- iov_iter_revert(&msg->msg_iter, psize - ret);
+ if (!retransmission) {
+ if (unlikely(ret < psize))
+ iov_iter_revert(&msg->msg_iter, psize - ret);
- /* send successful, keep track of sent data for mptcp-level
- * retransmission
- */
- dfrag->data_len += ret;
- if (!dfrag_collapsed) {
- get_page(dfrag->page);
- list_add_tail(&dfrag->list, &msk->rtx_queue);
- }
+ /* send successful, keep track of sent data for mptcp-level
+ * retransmission
+ */
+ dfrag->data_len += ret;
+ if (!dfrag_collapsed) {
+ get_page(dfrag->page);
+ list_add_tail(&dfrag->list, &msk->rtx_queue);
+ }
- /* charge data on mptcp rtx queue to the master socket
- * Note: we charge such data both to sk and ssk
- */
- sk->sk_forward_alloc -= frag_truesize;
+ /* charge data on mptcp rtx queue to the master socket
+ * Note: we charge such data both to sk and ssk
+ */
+ sk->sk_forward_alloc -= frag_truesize;
+ }
collapsed = skb == tcp_write_queue_tail(ssk);
if (collapsed) {
@@ -281,7 +304,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (mpext) {
memset(mpext, 0, sizeof(*mpext));
- mpext->data_seq = msk->write_seq;
+ mpext->data_seq = *write_seq;
mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
mpext->data_len = ret;
mpext->checksum = 0xbeef;
@@ -297,8 +320,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
*/
out:
- pfrag->offset += frag_truesize;
- msk->write_seq += ret;
+ *poffset += frag_truesize;
+ *write_seq += ret;
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret;
@@ -347,7 +370,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
mptcp_clean_una(sk);
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
while (msg_data_left(msg)) {
- ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
+ ret = mptcp_sendmsg_frag(sk, ssk, msg, NULL, &timeo, &mss_now,
&size_goal);
if (ret < 0)
break;
--
2.23.0
Powered by blists - more mailing lists