[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CANn89iJe=CjG8Lt+XWDOtgd7MdHzCszSd-j-jn0KXtEzWeXJjw@mail.gmail.com>
Date: Mon, 9 Jun 2025 09:26:26 -0700
From: Eric Dumazet <edumazet@...gle.com>
To: Jeremy Harris <jgh@...m.org>
Cc: netdev@...r.kernel.org, linux-api@...r.kernel.org, ncardwell@...gle.com
Subject: Re: [PATCH net-next v3 2/6] tcp: copy write-data from listen socket
to accept child socket
On Mon, Jun 9, 2025 at 9:05 AM Jeremy Harris <jgh@...m.org> wrote:
>
> Set the request_sock flag for fastopen earlier, making it available
> to the af_ops SYN-handler function.
>
> In that function copy data from the listen socket write queue into an
> sk_buff, allocating if needed and adding to the write queue of the
> newly-created child socket.
> Set sequence number values depending on the fastopen status.
>
> Signed-off-by: Jeremy Harris <jgh@...m.org>
> ---
> net/ipv4/tcp_fastopen.c | 3 ++-
> net/ipv4/tcp_ipv4.c | 4 +--
> net/ipv4/tcp_minisocks.c | 58 ++++++++++++++++++++++++++++++++++++----
> 3 files changed, 57 insertions(+), 8 deletions(-)
>
> diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
> index 9b83d639b5ac..03a86d0b87ba 100644
> --- a/net/ipv4/tcp_fastopen.c
> +++ b/net/ipv4/tcp_fastopen.c
> @@ -245,6 +245,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
> struct sock *child;
> bool own_req;
>
> + tcp_rsk(req)->tfo_listener = true;
> +
> child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
> NULL, &own_req);
> if (!child)
> @@ -261,7 +263,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
> tp = tcp_sk(child);
>
> rcu_assign_pointer(tp->fastopen_rsk, req);
> - tcp_rsk(req)->tfo_listener = true;
>
> /* RFC1323: The window in SYN & SYN/ACK segments is never
> * scaled. So correct it appropriately.
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 6a14f9e6fef6..e488effdbdb2 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1747,8 +1747,8 @@ EXPORT_IPV6_MOD(tcp_v4_conn_request);
>
>
> /*
> - * The three way handshake has completed - we got a valid synack -
> - * now create the new socket.
> + * The three way handshake has completed - we got a valid synack
> + * (or a FASTOPEN syn) - now create the new socket.
> */
> struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
> struct request_sock *req,
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 43d7852ce07e..d471531b4a78 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -529,7 +529,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
> struct inet_connection_sock *newicsk;
> const struct tcp_sock *oldtp;
> struct tcp_sock *newtp;
> - u32 seq;
> + u32 seq, a_seq, n_seq;
>
> if (!newsk)
> return NULL;
> @@ -550,9 +550,55 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
> newtp->segs_in = 1;
>
> seq = treq->snt_isn + 1;
> - newtp->snd_sml = newtp->snd_una = seq;
> - WRITE_ONCE(newtp->snd_nxt, seq);
> - newtp->snd_up = seq;
> + n_seq = seq;
> + a_seq = seq;
> + newtp->write_seq = seq;
> + newtp->snd_una = seq;
> +
> + /* If there is write-data sitting on the listen socket, copy it to
> + * the accept socket. If FASTOPEN we will send it on the synack,
> + * otherwise it sits there until 3rd-ack arrives.
> + */
> +
> + if (unlikely(!skb_queue_empty(&sk->sk_write_queue))) {
> + struct sk_buff *l_skb = tcp_send_head(sk),
> + *a_skb = tcp_write_queue_tail(newsk);
> + ssize_t copy = 0;
> +
> + if (a_skb)
> + copy = l_skb->len - a_skb->len;
> +
> + if (copy <= 0 || !tcp_skb_can_collapse_to(a_skb)) {
> + bool first_skb = tcp_rtx_and_write_queues_empty(newsk);
> +
> + a_skb = tcp_stream_alloc_skb(newsk,
> + newsk->sk_allocation,
> + first_skb);
> + if (!a_skb) {
> + /* is this the correct free? */
> + bh_unlock_sock(newsk);
> + sk_free(newsk);
> + return NULL;
> + }
> +
> + tcp_skb_entail(newsk, a_skb);
> + }
> + copy = min_t(int, l_skb->len, skb_tailroom(a_skb));
> + skb_put_data(a_skb, l_skb->data, copy);
TCP stack no longer puts payload in skb->head. This caused many
issues, trust me.
Please do not try to bring this back.
Also I see no locking there, this is racy with another thread trying
to sendmsg() data to the listener.
Honestly, I do not like this series at all adding cost in TCP fast
path for a very narrow use-case.
Powered by blists - more mailing lists