[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAK6E8=dUvde2s7VuUS9rHiA2fU0ZT2bS=ronjM+9BOA=0u-rVw@mail.gmail.com>
Date: Wed, 19 Nov 2014 14:10:42 +0800
From: Yuchung Cheng <ycheng@...gle.com>
To: Eric Dumazet <eric.dumazet@...il.com>
Cc: Denys Fedoryshchenko <nuclearcat@...learcat.com>,
David Miller <davem@...emloft.net>,
netdev <netdev@...r.kernel.org>,
Neal Cardwell <ncardwell@...gle.com>
Subject: Re: [PATCH v2 net-next] tcp: make connect() mem charging friendly
On Tue, Nov 18, 2014 at 3:06 PM, Eric Dumazet <eric.dumazet@...il.com> wrote:
> From: Eric Dumazet <edumazet@...gle.com>
>
> While working on sk_forward_alloc problems reported by Denys
> Fedoryshchenko, we found that tcp connect() (and fastopen) do not call
> sk_wmem_schedule() for SYN packet (and/or SYN/DATA packet), so
> sk_forward_alloc is negative while connect is in progress.
>
> We can fix this by calling regular sk_stream_alloc_skb() both for the
> SYN packet (in tcp_connect()) and the syn_data packet in
> tcp_send_syn_data()
>
> Then, tcp_send_syn_data() can avoid copying syn_data as we simply
> can manipulate syn_data->cb[] to remove SYN flag (and increment seq)
>
> Instead of open coding memcpy_fromiovecend(), simply use this helper.
>
> This leaves in socket write queue clean fast clone skbs.
>
> This was tested against our fastopen packetdrill tests.
>
> Reported-by: Denys Fedoryshchenko <nuclearcat@...learcat.com>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Acked-by: Yuchung Cheng <ycheng@...gle.com>
Thanks! this simplifies the code a lot.
> ---
> v2: added a kfree_skb(syn_data) if memcpy_fromiovecend() fails,
> as spotted by Yuchung.
>
> net/ipv4/tcp_output.c | 68 ++++++++++++++++------------------------
> 1 file changed, 28 insertions(+), 40 deletions(-)
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index eb73a1dccf56b823a45c0ca034e40dc50fc48068..f5bd4bd3f7e669b3fd48a843d55e7313a30a3409 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -3011,9 +3011,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
> {
> struct tcp_sock *tp = tcp_sk(sk);
> struct tcp_fastopen_request *fo = tp->fastopen_req;
> - int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
> - struct sk_buff *syn_data = NULL, *data;
> + int syn_loss = 0, space, err = 0;
> unsigned long last_syn_loss = 0;
> + struct sk_buff *syn_data;
>
> tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
> tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
> @@ -3044,48 +3044,40 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
> /* limit to order-0 allocations */
> space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
>
> - syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
> - sk->sk_allocation);
> - if (syn_data == NULL)
> + syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
> + if (!syn_data)
> goto fallback;
> + syn_data->ip_summed = CHECKSUM_PARTIAL;
> + memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
> + if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
> + fo->data->msg_iov, 0, space))) {
> + kfree_skb(syn_data);
> + goto fallback;
> + }
>
> - for (i = 0; i < iovlen && syn_data->len < space; ++i) {
> - struct iovec *iov = &fo->data->msg_iov[i];
> - unsigned char __user *from = iov->iov_base;
> - int len = iov->iov_len;
> + /* No more data pending in inet_wait_for_connect() */
> + if (space == fo->size)
> + fo->data = NULL;
> + fo->copied = space;
>
> - if (syn_data->len + len > space)
> - len = space - syn_data->len;
> - else if (i + 1 == iovlen)
> - /* No more data pending in inet_wait_for_connect() */
> - fo->data = NULL;
> + tcp_connect_queue_skb(sk, syn_data);
>
> - if (skb_add_data(syn_data, from, len))
> - goto fallback;
> - }
> + err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
>
> - /* Queue a data-only packet after the regular SYN for retransmission */
> - data = pskb_copy(syn_data, sk->sk_allocation);
> - if (data == NULL)
> - goto fallback;
> - TCP_SKB_CB(data)->seq++;
> - TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
> - TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
> - tcp_connect_queue_skb(sk, data);
> - fo->copied = data->len;
> -
> - /* syn_data is about to be sent, we need to take current time stamps
> - * for the packets that are in write queue : SYN packet and DATA
> - */
> - skb_mstamp_get(&syn->skb_mstamp);
> - data->skb_mstamp = syn->skb_mstamp;
> + syn->skb_mstamp = syn_data->skb_mstamp;
>
> - if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
> + /* Now full SYN+DATA was cloned and sent (or not),
> + * remove the SYN from the original skb (syn_data)
> + * we keep in write queue in case of a retransmit, as we
> + * also have the SYN packet (with no data) in the same queue.
> + */
> + TCP_SKB_CB(syn_data)->seq++;
> + TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
> + if (!err) {
> tp->syn_data = (fo->copied > 0);
> NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
> goto done;
> }
> - syn_data = NULL;
>
> fallback:
> /* Send a regular SYN with Fast Open cookie request option */
> @@ -3094,7 +3086,6 @@ fallback:
> err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
> if (err)
> tp->syn_fastopen = 0;
> - kfree_skb(syn_data);
> done:
> fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
> return err;
> @@ -3114,13 +3105,10 @@ int tcp_connect(struct sock *sk)
> return 0;
> }
>
> - buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
> - if (unlikely(buff == NULL))
> + buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
> + if (unlikely(!buff))
> return -ENOBUFS;
>
> - /* Reserve space for headers. */
> - skb_reserve(buff, MAX_TCP_HEADER);
> -
> tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
> tp->retrans_stamp = tcp_time_stamp;
> tcp_connect_queue_skb(sk, buff);
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists