[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241204074710.990092-16-dhowells@redhat.com>
Date: Wed, 4 Dec 2024 07:46:43 +0000
From: David Howells <dhowells@...hat.com>
To: netdev@...r.kernel.org
Cc: David Howells <dhowells@...hat.com>,
Marc Dionne <marc.dionne@...istor.com>,
Yunsheng Lin <linyunsheng@...wei.com>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
linux-afs@...ts.infradead.org,
linux-kernel@...r.kernel.org
Subject: [PATCH net-next v2 15/39] rxrpc: Only set DF=1 on initial DATA transmission
Change how the DF flag is managed on DATA transmissions. Set it on initial
transmission and don't set it on retransmissions. Then remove the handling
for EMSGSIZE in rxrpc_send_data_packet() and just pretend it didn't happen,
leaving it to the retransmission path to retry.
The path-MTU discovery using PING ACKs is then used to probe for the
maximum DATA size - though notification by ICMP will be used if one is
received.
Signed-off-by: David Howells <dhowells@...hat.com>
cc: Marc Dionne <marc.dionne@...istor.com>
cc: "David S. Miller" <davem@...emloft.net>
cc: Eric Dumazet <edumazet@...gle.com>
cc: Jakub Kicinski <kuba@...nel.org>
cc: Paolo Abeni <pabeni@...hat.com>
cc: linux-afs@...ts.infradead.org
cc: netdev@...r.kernel.org
---
net/rxrpc/ar-internal.h | 1 +
net/rxrpc/output.c | 32 ++++++++++++++++----------------
net/rxrpc/proc.c | 5 +++--
3 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 55cc68dd1b40..84efa21f176c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -98,6 +98,7 @@ struct rxrpc_net {
atomic_t stat_tx_data_send;
atomic_t stat_tx_data_send_frag;
atomic_t stat_tx_data_send_fail;
+ atomic_t stat_tx_data_send_msgsize;
atomic_t stat_tx_data_underflow;
atomic_t stat_tx_data_cwnd_reset;
atomic_t stat_rx_data;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index ca0da5e5d278..3d992023f80f 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -552,16 +552,11 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
msg.msg_controllen = 0;
msg.msg_flags = MSG_SPLICE_PAGES;
- /* Track what we've attempted to transmit at least once so that the
- * retransmission algorithm doesn't try to resend what we haven't sent
- * yet.
+ /* Send the packet with the don't fragment bit set unless we think it's
+ * too big or if this is a retransmission.
*/
- if (txb->seq == call->tx_transmitted + 1)
- call->tx_transmitted = txb->seq + n - 1;
-
- /* send the packet with the don't fragment bit set if we currently
- * think it's small enough */
- if (len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
+ if (txb->seq == call->tx_transmitted + 1 &&
+ len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
rxrpc_local_dont_fragment(conn->local, false);
frag = rxrpc_tx_point_call_data_frag;
} else {
@@ -569,6 +564,13 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
frag = rxrpc_tx_point_call_data_nofrag;
}
+ /* Track what we've attempted to transmit at least once so that the
+ * retransmission algorithm doesn't try to resend what we haven't sent
+ * yet.
+ */
+ if (txb->seq == call->tx_transmitted + 1)
+ call->tx_transmitted = txb->seq + n - 1;
+
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
@@ -580,7 +582,6 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
}
}
-retry:
/* send the packet by UDP
* - returns -EMSGSIZE if UDP would have to fragment the packet
* to go out of the interface
@@ -591,7 +592,11 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
conn->peer->last_tx_at = ktime_get_seconds();
- if (ret < 0) {
+ if (ret == -EMSGSIZE) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize);
+ trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag);
+ ret = 0;
+ } else if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
} else {
@@ -599,11 +604,6 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
}
rxrpc_tx_backoff(call, ret);
- if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_nofrag) {
- rxrpc_local_dont_fragment(conn->local, false);
- frag = rxrpc_tx_point_call_data_frag;
- goto retry;
- }
done:
if (ret >= 0) {
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 44722c226064..249e1ed9c5c9 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -473,10 +473,11 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq));
seq_printf(seq,
- "Data : send=%u sendf=%u fail=%u\n",
+ "Data : send=%u sendf=%u fail=%u emsz=%u\n",
atomic_read(&rxnet->stat_tx_data_send),
atomic_read(&rxnet->stat_tx_data_send_frag),
- atomic_read(&rxnet->stat_tx_data_send_fail));
+ atomic_read(&rxnet->stat_tx_data_send_fail),
+ atomic_read(&rxnet->stat_tx_data_send_msgsize));
seq_printf(seq,
"Data-Tx : nr=%u retrans=%u uf=%u cwr=%u\n",
atomic_read(&rxnet->stat_tx_data),
Powered by blists - more mailing lists