[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAB9dFdteL97Z8GAry9TwmcOfw0+PQDzL_u14PwwAEq5uUHaUkQ@mail.gmail.com>
Date: Tue, 9 Jan 2024 08:59:48 -0400
From: Marc Dionne <marc.dionne@...istor.com>
To: David Howells <dhowells@...hat.com>
Cc: "David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, linux-afs@...ts.infradead.org,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net] rxrpc: Fix use of Don't Fragment flag
On Tue, Jan 9, 2024 at 6:51 AM David Howells <dhowells@...hat.com> wrote:
>
> rxrpc normally has the Don't Fragment flag set on the UDP packets it
> transmits, except when it has decided that DATA packets aren't getting
> through - in which case it turns it off just for the DATA transmissions.
> This can be a problem, however, for RESPONSE packets that convey
> authentication and crypto data from the client to the server as ticket may
> be larger than can fit in the MTU.
>
> In such a case, rxrpc gets itself into an infinite loop as the sendmsg
> returns an error (EMSGSIZE), which causes rxkad_send_response() to return
> -EAGAIN - and the CHALLENGE packet is put back on the Rx queue to retry,
> leading to the I/O thread endlessly attempting to perform the transmission.
>
> Fix this by disabling DF on RESPONSE packets for now. The use of DF and
> best data MTU determination needs reconsidering at some point in the
> future.
>
> Reported-by: Marc Dionne <marc.dionne@...istor.com>
> Signed-off-by: David Howells <dhowells@...hat.com>
> cc: "David S. Miller" <davem@...emloft.net>
> cc: Eric Dumazet <edumazet@...gle.com>
> cc: Jakub Kicinski <kuba@...nel.org>
> cc: Paolo Abeni <pabeni@...hat.com>
> cc: linux-afs@...ts.infradead.org
> cc: netdev@...r.kernel.org
> ---
> net/rxrpc/ar-internal.h | 1 +
> net/rxrpc/local_object.c | 13 ++++++++++++-
> net/rxrpc/output.c | 6 ++----
> net/rxrpc/rxkad.c | 2 ++
> 4 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
> index e8e14c6f904d..e8b43408136a 100644
> --- a/net/rxrpc/ar-internal.h
> +++ b/net/rxrpc/ar-internal.h
> @@ -1076,6 +1076,7 @@ void rxrpc_send_version_request(struct rxrpc_local *local,
> /*
> * local_object.c
> */
> +void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set);
> struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
> struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace);
> struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace);
> diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
> index c553a30e9c83..7a3150482e37 100644
> --- a/net/rxrpc/local_object.c
> +++ b/net/rxrpc/local_object.c
> @@ -36,6 +36,17 @@ static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err,
> return ipv6_icmp_error(sk, skb, err, port, info, payload);
> }
>
> +/*
> + * Set or clear the Don't Fragment flag on a socket.
> + */
> +void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set)
> +{
> + if (set)
> + ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DONT);
> + else
> + ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
> +}
Shouldn't those be reversed - don't fragment should be IP_PMTUDISC_DO?
Also, and this is probably already an issue with current code, I don't
think this is effective if the socket is V6, which it will be in most
cases.
> +
> /*
> * Compare a local to an address. Return -ve, 0 or +ve to indicate less than,
> * same or greater than.
> @@ -203,7 +214,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
> ip_sock_set_recverr(usk);
>
> /* we want to set the don't fragment bit */
> - ip_sock_set_mtu_discover(usk, IP_PMTUDISC_DO);
> + rxrpc_local_dont_fragment(local, true);
>
> /* We want receive timestamps. */
> sock_enable_timestamps(usk);
> diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
> index 5e53429c6922..a0906145e829 100644
> --- a/net/rxrpc/output.c
> +++ b/net/rxrpc/output.c
> @@ -494,14 +494,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
> switch (conn->local->srx.transport.family) {
> case AF_INET6:
> case AF_INET:
> - ip_sock_set_mtu_discover(conn->local->socket->sk,
> - IP_PMTUDISC_DONT);
> + rxrpc_local_dont_fragment(conn->local, false);
> rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
> ret = do_udp_sendmsg(conn->local->socket, &msg, len);
> conn->peer->last_tx_at = ktime_get_seconds();
>
> - ip_sock_set_mtu_discover(conn->local->socket->sk,
> - IP_PMTUDISC_DO);
> + rxrpc_local_dont_fragment(conn->local, true);
> break;
>
> default:
> diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
> index 1bf571a66e02..b52dedcebce0 100644
> --- a/net/rxrpc/rxkad.c
> +++ b/net/rxrpc/rxkad.c
> @@ -724,7 +724,9 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
> serial = atomic_inc_return(&conn->serial);
> whdr.serial = htonl(serial);
>
> + rxrpc_local_dont_fragment(conn->local, false);
> ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
> + rxrpc_local_dont_fragment(conn->local, true);
> if (ret < 0) {
> trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
> rxrpc_tx_point_rxkad_response);
Marc
Powered by blists - more mailing lists