[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241124093531.3783434-1-ilia.lin@kernel.org>
Date: Sun, 24 Nov 2024 11:35:31 +0200
From: Ilia Lin <ilia.lin@...nel.org>
To: steffen.klassert@...unet.com,
leonro@...dia.com,
herbert@...dor.apana.org.au,
davem@...emloft.net,
dsahern@...nel.org,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
horms@...nel.org
Cc: netdev@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH] xfrm: Add pre-encap fragmentation for packet offload
In packet offload mode the raw packets will be sent to the NiC,
and will not return to the Network Stack. In event of crossing
the MTU size after the encapsulation, the NiC HW may not be
able to fragment the final packet.
Adding mandatory pre-encapsulation fragmentation for both
IPv4 and IPv6, if tunnel mode with packet offload is configured
on the state.
Signed-off-by: Ilia Lin <ilia.lin@...nel.org>
---
net/ipv4/xfrm4_output.c | 31 +++++++++++++++++++++++++++++--
net/ipv6/xfrm6_output.c | 8 ++++++--
2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 3cff51ba72bb0..a4271e0dd51bb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -14,17 +14,44 @@
#include <net/xfrm.h>
#include <net/icmp.h>
+static int __xfrm4_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ return xfrm_output(sk, skb);
+}
+
static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
-#ifdef CONFIG_NETFILTER
- struct xfrm_state *x = skb_dst(skb)->xfrm;
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
+ unsigned int mtu;
+ bool toobig;
+#ifdef CONFIG_NETFILTER
if (!x) {
IPCB(skb)->flags |= IPSKB_REROUTED;
return dst_output(net, sk, skb);
}
#endif
+ if (x->props.mode != XFRM_MODE_TUNNEL || x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ goto skip_frag;
+
+ mtu = xfrm_state_mtu(x, dst_mtu(skb_dst(skb)));
+
+ toobig = skb->len > mtu && !skb_is_gso(skb);
+
+ if (!skb->ignore_df && toobig && skb->sk) {
+ xfrm_local_error(skb, mtu);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ if (toobig) {
+ IPCB(skb)->frag_max_size = mtu;
+ return ip_do_fragment(net, sk, skb, __xfrm4_output_finish);
+ }
+
+skip_frag:
return xfrm_output(sk, skb);
}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5f7b1fdbffe62..fdd2f2f5adc71 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -75,10 +75,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (x->props.mode != XFRM_MODE_TUNNEL)
goto skip_frag;
- if (skb->protocol == htons(ETH_P_IPV6))
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
+ mtu = xfrm_state_mtu(x, dst_mtu(skb_dst(skb)));
+ IP6CB(skb)->frag_max_size = mtu;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
mtu = ip6_skb_dst_mtu(skb);
- else
+ } else {
mtu = dst_mtu(skb_dst(skb));
+ }
toobig = skb->len > mtu && !skb_is_gso(skb);
--
2.25.1
Powered by blists - more mailing lists