lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1383422330.4291.58.camel@edumazet-glaptop2.roam.corp.google.com>
Date:	Sat, 02 Nov 2013 12:58:50 -0700
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	Ben Hutchings <bhutchings@...arflare.com>
Cc:	David Miller <davem@...emloft.net>, christoph.paasch@...ouvain.be,
	herbert@...dor.apana.org.au, netdev@...r.kernel.org,
	hkchu@...gle.com, mwdalton@...gle.com
Subject: Re: [PATCH v4 net-next] net: introduce dev_set_forwarding()

From: Eric Dumazet <edumazet@...gle.com>

Christoph Paasch and Jerry Chu reported crashes in skb_segment() caused
by commit 8a29111c7ca6 ("net: gro: allow to build full sized skb")

skb_segment() only deals with a frag_list chain containing MSS sized
fragments. Even if we fix this problem, its better if GRO layer
doesn't build skb with a frag_list in the first place, to let TSO
packets reaching output devices.
 
David Miller and Ben Hutchings suggested we keep track of number of
forwarding users to be able to :

- Disable LRO
- Make sure GRO layer do not use skb frag_list to extend skb capacity

Note that after this patch, LRO is automatically re-enabled if
forwarding is disabled on the device, or if a device is removed
from a bridge.

Tested:

lpq84:~# ethtool -k eth0 | grep "large-receive"
large-receive-offload: on
lpq84:~# echo 1 >/proc/sys/net/ipv4/conf/eth0/forwarding
lpq84:~# ethtool -k eth0 | grep "large-receive"
large-receive-offload: off [requested on]
lpq84:~# echo 0 >/proc/sys/net/ipv4/conf/eth0/forwarding
lpq84:~# ethtool -k eth0 | grep "large-receive"
large-receive-offload: on


lpq84:~# ethtool -K eth0 lro off
lpq84:~# ethtool -k eth0 | grep "large-receive"
large-receive-offload: off
lpq84:~# echo 1 >/proc/sys/net/ipv4/conf/eth0/forwarding
lpq84:~# ethtool -k eth0 | grep "large-receive"
large-receive-offload: off
lpq84:~# echo 0 >/proc/sys/net/ipv4/conf/eth0/forwarding
lpq84:~# ethtool -k eth0 | grep "large-receive"
large-receive-offload: off
lpq84:~# ethtool -K eth0 lro on 


lpq84:~# cat /proc/sys/net/ipv4/ip_forward
0
lpq84:~# echo 1 >/proc/sys/net/ipv4/ip_forward
lpq84:~# ethtool -k eth0 | grep "large-receive"
large-receive-offload: off [requested on]
lpq84:~# echo 0 >/proc/sys/net/ipv4/ip_forward
lpq84:~# ethtool -k eth0 | grep "large-receive"
large-receive-offload: on


Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Reported-by: Christoph Paasch <christoph.paasch@...ouvain.be>
Reported-by: Jerry Chu <hkchu@...gle.com>
Cc: Michael Dalton <mwdalton@...gle.com>
Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb")
---
v4: drop LRO in netdev_fix_features(), ase Ben pointed out.

 include/linux/netdevice.h |    3 ++-
 net/bridge/br_if.c        |    4 +++-
 net/core/dev.c            |   31 ++++++++++++++++++++-----------
 net/core/skbuff.c         |   11 ++++++++---
 net/ipv4/devinet.c        |   14 ++++++++------
 net/ipv6/addrconf.c       |    5 ++---
 net/ipv6/addrconf_core.c  |    2 ++
 7 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb1d918..6ddd0fa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1295,6 +1295,7 @@ struct net_device {
 
 	struct netdev_queue __rcu *ingress_queue;
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
+	unsigned int		forwarding_count;
 
 
 /*
@@ -1787,7 +1788,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name);
 int dev_alloc_name(struct net_device *dev, const char *name);
 int dev_open(struct net_device *dev);
 int dev_close(struct net_device *dev);
-void dev_disable_lro(struct net_device *dev);
+void dev_set_forwarding(struct net_device *dev, int inc);
 int dev_loopback_xmit(struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
 int register_netdevice(struct net_device *dev);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c41d5fb..9591643 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -150,6 +150,8 @@ static void del_nbp(struct net_bridge_port *p)
 
 	netdev_rx_handler_unregister(dev);
 
+	dev_set_forwarding(dev, -1);
+
 	netdev_upper_dev_unlink(dev, br->dev);
 
 	br_multicast_del_port(p);
@@ -377,7 +379,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	dev->priv_flags |= IFF_BRIDGE_PORT;
 
-	dev_disable_lro(dev);
+	dev_set_forwarding(dev, 1);
 
 	list_add_rcu(&p->list, &br->port_list);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 0054c8c..f95bde6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1408,29 +1408,33 @@ EXPORT_SYMBOL(dev_close);
 
 
 /**
- *	dev_disable_lro - disable Large Receive Offload on a device
- *	@dev: device
- *
- *	Disable Large Receive Offload (LRO) on a net device.  Must be
- *	called under RTNL.  This is needed if received packets may be
- *	forwarded to another interface.
+ * dev_set_forwarding - Keep count of forwarding users for a device
+ * @dev: device
+ * @inc: +1 or -1
+ *
+ * Add or remove forwarding from a device.
+ * When the count of forwarding users is above 0 :
+ * 1) disable LRO (Large Receive Offload) on this device.
+ * 2) instruct GRO layer to not use frag_list to extend skb capacity.
+ * Must be called under RTNL.
+ * This is needed if received packets may be forwarded to another interface.
  */
-void dev_disable_lro(struct net_device *dev)
+void dev_set_forwarding(struct net_device *dev, int inc)
 {
 	/*
-	 * If we're trying to disable lro on a vlan device
+	 * If we're trying to enable forwarding from a vlan device
 	 * use the underlying physical device instead
 	 */
 	if (is_vlan_dev(dev))
 		dev = vlan_dev_real_dev(dev);
 
-	dev->wanted_features &= ~NETIF_F_LRO;
+	dev->forwarding_count += inc;
 	netdev_update_features(dev);
 
-	if (unlikely(dev->features & NETIF_F_LRO))
+	if (unlikely(dev->forwarding_count && (dev->features & NETIF_F_LRO)))
 		netdev_WARN(dev, "failed to disable LRO!\n");
 }
-EXPORT_SYMBOL(dev_disable_lro);
+EXPORT_SYMBOL(dev_set_forwarding);
 
 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
 				   struct net_device *dev)
@@ -5584,6 +5588,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		}
 	}
 
+	if ((features & NETIF_F_LRO) && dev->forwarding_count) {
+		netdev_dbg(dev, "Dropping LRO because of forwarding.\n");
+		features &= ~NETIF_F_LRO;
+	}
+
 	return features;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0ab32fa..7b1cff8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2944,9 +2944,11 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 		int i = skbinfo->nr_frags;
 		int nr_frags = pinfo->nr_frags + i;
 
-		if (nr_frags > MAX_SKB_FRAGS)
+		if (nr_frags > MAX_SKB_FRAGS) {
+			if (skb->dev->forwarding_count)
+				return -E2BIG;
 			goto merge;
-
+		}
 		offset -= headlen;
 		pinfo->nr_frags = nr_frags;
 		skbinfo->nr_frags = 0;
@@ -2977,8 +2979,11 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 		unsigned int first_size = headlen - offset;
 		unsigned int first_offset;
 
-		if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
+		if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) {
+			if (skb->dev->forwarding_count)
+				return -E2BIG;
 			goto merge;
+		}
 
 		first_offset = skb->data -
 			       (unsigned char *)page_address(page) +
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a1b5bcb..cf01e6f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -223,6 +223,8 @@ void in_dev_finish_destroy(struct in_device *idev)
 #ifdef NET_REFCNT_DEBUG
 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 #endif
+	if (IPV4_DEVCONF(idev->cnf, FORWARDING))
+		dev_set_forwarding(dev, -1);
 	dev_put(dev);
 	if (!idev->dead)
 		pr_err("Freeing alive in_device %p\n", idev);
@@ -248,7 +250,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
 	if (!in_dev->arp_parms)
 		goto out_kfree;
 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
-		dev_disable_lro(dev);
+		dev_set_forwarding(dev, 1);
 	/* Reference in_dev->dev */
 	dev_hold(dev);
 	/* Account for reference dev->ip_ptr (below) */
@@ -1932,8 +1934,8 @@ static void inet_forward_change(struct net *net)
 
 	for_each_netdev(net, dev) {
 		struct in_device *in_dev;
-		if (on)
-			dev_disable_lro(dev);
+
+		dev_set_forwarding(dev, on ? 1 : -1);
 		rcu_read_lock();
 		in_dev = __in_dev_get_rcu(dev);
 		if (in_dev) {
@@ -1997,7 +1999,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
 	loff_t pos = *ppos;
 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
-	if (write && *valp != val) {
+	if (write && (!(*valp) != !val)) {
 		struct net *net = ctl->extra2;
 
 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
@@ -2013,8 +2015,8 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
 				struct ipv4_devconf *cnf = ctl->extra1;
 				struct in_device *idev =
 					container_of(cnf, struct in_device, cnf);
-				if (*valp)
-					dev_disable_lro(idev->dev);
+
+				dev_set_forwarding(idev->dev, *valp ? 1 : -1);
 				inet_netconf_notify_devconf(net,
 							    NETCONFA_FORWARDING,
 							    idev->dev->ifindex,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 542d095..232b2c4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -322,7 +322,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 		return NULL;
 	}
 	if (ndev->cnf.forwarding)
-		dev_disable_lro(dev);
+		dev_set_forwarding(dev, 1);
 	/* We refer to the device */
 	dev_hold(dev);
 
@@ -638,8 +638,7 @@ static void dev_forward_change(struct inet6_dev *idev)
 	if (!idev)
 		return;
 	dev = idev->dev;
-	if (idev->cnf.forwarding)
-		dev_disable_lro(dev);
+	dev_set_forwarding(dev, idev->cnf.forwarding ? 1 : -1);
 	if (dev->flags & IFF_MULTICAST) {
 		if (idev->cnf.forwarding) {
 			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 4c11cbc..b7620ad 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -139,6 +139,8 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 #ifdef NET_REFCNT_DEBUG
 	pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
 #endif
+	if (idev->cnf.forwarding)
+		dev_set_forwarding(dev, -1);
 	dev_put(dev);
 	if (!idev->dead) {
 		pr_warn("Freeing alive inet6 device %p\n", idev);


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ