lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20201009052610.20945-1-lina.wang@mediatek.com>
Date:   Fri, 9 Oct 2020 13:26:10 +0800
From:   Lina Wang <lina.wang@...iatek.com>
To:     "David S . Miller" <davem@...emloft.net>,
        Alexey Kuznetsov <kuznet@....inr.ac.ru>,
        Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
        Jakub Kicinski <kuba@...nel.org>,
        Steffen Klassert <steffen.klassert@...unet.com>,
        Herbert Xu <herbert@...dor.apana.org.au>,
        Matthias Brugger <matthias.bgg@...il.com>
CC:     <netdev@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        <linux-arm-kernel@...ts.infradead.org>,
        <linux-mediatek@...ts.infradead.org>, <kernel-team@...roid.com>,
        Lina Wang <lina.wang@...iatek.com>
Subject: xfrm:fragmented ipv4 tunnel packets in inner interface

In esp's tunnel mode,if inner interface is ipv4,outer is ipv4,one big 
packet which travels through tunnel will be fragmented with outer 
interface's mtu,peer server will remove tunnelled esp header and assemble
them in big packet.After forwarding such packet to next endpoint,it will 
be dropped because of exceeding mtu or be returned ICMP(packet-too-big).
When inner interface is ipv4,outer is ipv6,the flag of xfrm state in tunnel
mode is af-unspec, thing is different.One big packet through tunnel will be
fragmented with outer interface's mtu minus tunneled header, then two or 
more less fragmented packets will be tunneled and transmitted in outer 
interface,that is what xfrm6_output has done. If peer server receives such
packets, it will forward successfully to next because length is valid.

This patch has followed up xfrm6_output's logic,which includes two changes,
one is choosing suitable mtu value which considering innner/outer 
interface's mtu and dst path, the other is if packet is too big, calling 
ip_fragment first,then tunnelling fragmented packets in outer interface and
transmitting finally.

Signed-off-by: Lina Wang <lina.wang@...iatek.com>
---
 include/net/ip.h        |  3 +++
 net/ipv4/ip_output.c    | 10 +++-------
 net/ipv4/xfrm4_output.c | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index b09c48d862cc..05f9c6454ff5 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -163,6 +163,9 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		   int (*output)(struct net *, struct sock *, struct sk_buff *));
+int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		unsigned int mtu,
+		int (*output)(struct net *, struct sock *, struct sk_buff *));
 
 struct ip_fraglist_iter {
 	struct sk_buff	*frag;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 61f802d5350c..f99249132a76 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -82,10 +82,6 @@
 #include <linux/netlink.h>
 #include <linux/tcp.h>
 
-static int
-ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
-	    unsigned int mtu,
-	    int (*output)(struct net *, struct sock *, struct sk_buff *));
 
 /* Generate a checksum for an outgoing IP datagram. */
 void ip_send_check(struct iphdr *iph)
@@ -569,9 +565,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
-static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
-		       unsigned int mtu,
-		       int (*output)(struct net *, struct sock *, struct sk_buff *))
+int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		unsigned int mtu,
+		int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct iphdr *iph = ip_hdr(skb);
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 3cff51ba72bb..1488b79186ad 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -14,8 +14,27 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 
+static int __xfrm4_output_finish(struct net *net, struct sock *sk,
+				 struct sk_buff *skb)
+{
+	return xfrm_output(sk, skb);
+}
+
+static inline int ip4_skb_dst_mtu(struct sk_buff *skb)
+{
+	struct inet_sock *np = skb->sk && !dev_recursion_level() ?
+				inet_sk(skb->sk) : NULL;
+
+	return (np & np->pmtudisc >= IP_PMTUDISC_PROBE) ?
+		skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+}
+
 static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	int mtu;
+	bool toobig;
+	struct xfrm_state *x = skb_dst(skb)->xfrm;
+
 #ifdef CONFIG_NETFILTER
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
 
@@ -25,6 +44,24 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	}
 #endif
 
+	if (x->props.mode != XFRM_MODE_TUNNEL)
+		goto skip_frag;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		mtu = ip4_skb_dst_mtu(skb);
+	else
+		goto skip_frag;
+
+	toobig = skb->len > mtu && !skb_is_gso(skb);
+	if (!skb->ignore_df && toobig && skb->sk) {
+		xfrm_local_error(skb, mtu);
+		return -EMSGSIZE;
+	}
+
+	if (toobig || dst_allfrag(skb_dst(skb)))
+		return ip_fragment(net, sk, skb, mtu, __xfrm4_output_finish);
+
+skip_frag:
 	return xfrm_output(sk, skb);
 }
 
-- 
2.18.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ