linux-kernel - [PATCH] xfrm: Add pre-encap fragmentation for packet offload

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [thread-next>] [day] [month] [year] [list]

Message-Id: <20241124093531.3783434-1-ilia.lin@kernel.org>
Date: Sun, 24 Nov 2024 11:35:31 +0200
From: Ilia Lin <ilia.lin@...nel.org>
To: steffen.klassert@...unet.com,
	leonro@...dia.com,
	herbert@...dor.apana.org.au,
	davem@...emloft.net,
	dsahern@...nel.org,
	edumazet@...gle.com,
	kuba@...nel.org,
	pabeni@...hat.com,
	horms@...nel.org
Cc: netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH] xfrm: Add pre-encap fragmentation for packet offload

In packet offload mode the raw packets will be sent to the NiC,
and will not return to the Network Stack. In event of crossing
the MTU size after the encapsulation, the NiC HW may not be
able to fragment the final packet.
Adding mandatory pre-encapsulation fragmentation for both
IPv4 and IPv6, if tunnel mode with packet offload is configured
on the state.

Signed-off-by: Ilia Lin <ilia.lin@...nel.org>
---
 net/ipv4/xfrm4_output.c | 31 +++++++++++++++++++++++++++++--
 net/ipv6/xfrm6_output.c |  8 ++++++--
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 3cff51ba72bb0..a4271e0dd51bb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -14,17 +14,44 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 
+static int __xfrm4_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	return xfrm_output(sk, skb);
+}
+
 static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-#ifdef CONFIG_NETFILTER
-	struct xfrm_state *x = skb_dst(skb)->xfrm;
+	struct dst_entry *dst = skb_dst(skb);
+	struct xfrm_state *x = dst->xfrm;
+	unsigned int mtu;
+	bool toobig;
 
+#ifdef CONFIG_NETFILTER
 	if (!x) {
 		IPCB(skb)->flags |= IPSKB_REROUTED;
 		return dst_output(net, sk, skb);
 	}
 #endif
 
+	if (x->props.mode != XFRM_MODE_TUNNEL || x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+		goto skip_frag;
+
+	mtu = xfrm_state_mtu(x, dst_mtu(skb_dst(skb)));
+
+	toobig = skb->len > mtu && !skb_is_gso(skb);
+
+	if (!skb->ignore_df && toobig && skb->sk) {
+		xfrm_local_error(skb, mtu);
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	if (toobig) {
+		IPCB(skb)->frag_max_size = mtu;
+		return ip_do_fragment(net, sk, skb, __xfrm4_output_finish);
+	}
+
+skip_frag:
 	return xfrm_output(sk, skb);
 }
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5f7b1fdbffe62..fdd2f2f5adc71 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -75,10 +75,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (x->props.mode != XFRM_MODE_TUNNEL)
 		goto skip_frag;
 
-	if (skb->protocol == htons(ETH_P_IPV6))
+	if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
+		mtu = xfrm_state_mtu(x, dst_mtu(skb_dst(skb)));
+		IP6CB(skb)->frag_max_size = mtu;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
 		mtu = ip6_skb_dst_mtu(skb);
-	else
+	} else {
 		mtu = dst_mtu(skb_dst(skb));
+	}
 
 	toobig = skb->len > mtu && !skb_is_gso(skb);
 
-- 
2.25.1