lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Thu, 20 Jun 2024 15:19:10 -0700
From: Yan Zhai <yan@...udflare.com>
To: netdev@...r.kernel.org
Cc: "David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
	Alexei Starovoitov <ast@...nel.org>,
	Daniel Borkmann <daniel@...earbox.net>,
	Jesper Dangaard Brouer <hawk@...nel.org>,
	John Fastabend <john.fastabend@...il.com>,
	Willem de Bruijn <willemb@...gle.com>,
	Simon Horman <horms@...nel.org>, Florian Westphal <fw@...len.de>,
	Mina Almasry <almasrymina@...gle.com>,
	Abhishek Chauhan <quic_abchauha@...cinc.com>,
	David Howells <dhowells@...hat.com>,
	Alexander Lobakin <aleksander.lobakin@...el.com>,
	David Ahern <dsahern@...nel.org>,
	Richard Gobert <richardbgobert@...il.com>,
	Antoine Tenart <atenart@...nel.org>, Yan Zhai <yan@...udflare.com>,
	Felix Fietkau <nbd@....name>,
	Soheil Hassas Yeganeh <soheil@...gle.com>,
	Pavel Begunkov <asml.silence@...il.com>,
	Lorenzo Bianconi <lorenzo@...nel.org>,
	Thomas Weißschuh <linux@...ssschuh.net>,
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
	bpf@...r.kernel.org
Subject: [RFC net-next 1/9] skb: introduce gro_disabled bit

Software GRO is currently controlled by a single switch, i.e.

  ethtool -K dev gro on|off

However, this is not always desired. When GRO is enabled, even if the
kernel cannot GRO certain traffic, it has to run through the GRO receive
handlers with no benefit.

There are also scenarios that turning off GRO is a requirement. For
example, our production environment has a scenario that a TC egress hook
may add multiple encapsulation headers to forwarded skbs for load
balancing and isolation purpose. The encapsulation is implemented via
BPF. But the problem arises then: there is no way to properly offload a
double-encapsulated packet, since skb only has network_header and
inner_network_header to track one layer of encapsulation, but not two.
On the other hand, not all the traffic through this device needs double
encapsulation. But we have to turn off GRO completely for any ingress
device as a result.

Introduce a bit on skb so that GRO engine can be notified to skip GRO on
this skb, rather than having to be 0-or-1 for all traffic.

Signed-off-by: Yan Zhai <yan@...udflare.com>
---
 include/linux/netdevice.h |  9 +++++++--
 include/linux/skbuff.h    | 10 ++++++++++
 net/Kconfig               | 10 ++++++++++
 net/core/gro.c            |  2 +-
 net/core/gro_cells.c      |  2 +-
 net/core/skbuff.c         |  4 ++++
 6 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c83b390191d4..2ca0870b1221 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2415,11 +2415,16 @@ struct net_device {
 	((dev)->devlink_port = (port));				\
 })
 
-static inline bool netif_elide_gro(const struct net_device *dev)
+static inline bool netif_elide_gro(const struct sk_buff *skb)
 {
-	if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
+	if (!(skb->dev->features & NETIF_F_GRO) || skb->dev->xdp_prog)
 		return true;
+
+#ifdef CONFIG_SKB_GRO_CONTROL
+	return skb->gro_disabled;
+#else
 	return false;
+#endif
 }
 
 #define	NETDEV_ALIGN		32
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f4cda3fbdb75..48b10ece95b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1008,6 +1008,9 @@ struct sk_buff {
 #if IS_ENABLED(CONFIG_IP_SCTP)
 	__u8			csum_not_inet:1;
 #endif
+#ifdef CONFIG_SKB_GRO_CONTROL
+	__u8			gro_disabled:1;
+#endif
 
 #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
 	__u16			tc_index;	/* traffic control index */
@@ -1215,6 +1218,13 @@ static inline bool skb_wifi_acked_valid(const struct sk_buff *skb)
 #endif
 }
 
+static inline void skb_disable_gro(struct sk_buff *skb)
+{
+#ifdef CONFIG_SKB_GRO_CONTROL
+	skb->gro_disabled = 1;
+#endif
+}
+
 /**
  * skb_unref - decrement the skb's reference count
  * @skb: buffer
diff --git a/net/Kconfig b/net/Kconfig
index 9fe65fa26e48..47d1ee92df15 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -289,6 +289,16 @@ config MAX_SKB_FRAGS
 	  and in drivers using build_skb().
 	  If unsure, say 17.
 
+config SKB_GRO_CONTROL
+	bool "allow disable GRO on per-packet basis"
+	default y
+	help
+	  By default GRO can only be enabled or disabled per network device.
+	  This can be cumbersome for certain scenarios.
+	  Toggling this option will allow disabling GRO for selected packets,
+	  e.g. by XDP programs, so that it is more flexibile.
+	  Extra overhead should be minimal.
+
 config RPS
 	bool "Receive packet steering"
 	depends on SMP && SYSFS
diff --git a/net/core/gro.c b/net/core/gro.c
index b3b43de1a650..46232a0d1983 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -476,7 +476,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	enum gro_result ret;
 	int same_flow;
 
-	if (netif_elide_gro(skb->dev))
+	if (netif_elide_gro(skb))
 		goto normal;
 
 	gro_list_prepare(&gro_list->list, skb);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index ff8e5b64bf6b..1bf15783300f 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -20,7 +20,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto drop;
 
-	if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) {
+	if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(skb)) {
 		res = netif_rx(skb);
 		goto unlock;
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2315c088e91d..82bd297921c1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6030,6 +6030,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	ipvs_reset(skb);
 	skb->mark = 0;
 	skb_clear_tstamp(skb);
+#ifdef CONFIG_SKB_GRO_CONTROL
+	/* hand back GRO control to next netns */
+	skb->gro_disabled = 0;
+#endif
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
 
-- 
2.30.2



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ