lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e7942894834908523a65147c2cce026537059f87.1738940816.git.pabeni@redhat.com>
Date: Fri,  7 Feb 2025 17:23:45 +0100
From: Paolo Abeni <pabeni@...hat.com>
To: netdev@...r.kernel.org
Cc: Willem de Bruijn <willemdebruijn.kernel@...il.com>,
	Eric Dumazet <edumazet@...gle.com>,
	Kuniyuki Iwashima <kuniyu@...zon.com>,
	"David S. Miller" <davem@...emloft.net>,
	Jakub Kicinski <kuba@...nel.org>,
	Simon Horman <horms@...nel.org>,
	Neal Cardwell <ncardwell@...gle.com>,
	David Ahern <dsahern@...nel.org>
Subject: [RFC PATCH 2/2] udp: avoid false sharing via protocol specific set_tsflags

After commit 5d4cc87414c5 ("net: reorganize "struct sock" fields"),
the sk_tsflags field shares the same cacheline with sk_forward_alloc.

The UDP protocol does not acquire the sock lock in the RX path;
forward allocations are protected via the receive queue spinlock.

Due to the above, under high packet rate traffic, when the BH and the
user-space process run on different CPUs, UDP packet reception will
experience a cache miss while accessing sk_tsflags.

Similarly to commit f796feabb9f5 ("udp: add local "peek offset enabled"
flag"), add a new field in the udp_sock struct to store a copy of the
ts_flags value in a cache friendly manner.

Use the newly introduced protocol op to sync-up the new field at every
sk_tsflags update.

With this patch applied, on an AMD epic server with i40e NICs, I
measured a 10% performance improvement for small packets UDP flood
performance tests - possibly a larger delta could be observed with more
recent H/W.

Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
 include/linux/udp.h | 12 ++++++++++++
 include/net/sock.h  | 14 ++++++++++----
 net/ipv4/udp.c      |  3 ++-
 net/ipv6/udp.c      |  3 ++-
 4 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0807e21cfec9..b186ac20fd38 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -101,6 +101,9 @@ struct udp_sock {
 
 	/* Cache friendly copy of sk->sk_peek_off >= 0 */
 	bool		peeking_with_offset;
+
+	/* Cache friendly copy of sk_tsflags & TSFLAGS_ANY */
+	bool		tsflags_any;
 };
 
 #define udp_test_bit(nr, sk)			\
@@ -125,6 +128,15 @@ static inline int udp_set_peek_off(struct sock *sk, int val)
 	return 0;
 }
 
+static inline int udp_set_tsflags(struct sock *sk, int val)
+{
+	WRITE_ONCE(udp_sk(sk)->tsflags_any, !!(val & TSFLAGS_ANY));
+	if (val & SOF_TIMESTAMPING_OPT_ID &&
+	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID))
+		atomic_set(&sk->sk_tskey, 0);
+	return 0;
+}
+
 static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
 {
 	udp_assign_bit(NO_CHECK6_TX, sk, val);
diff --git a/include/net/sock.h b/include/net/sock.h
index 282dd23b90dc..5767c2dace2a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2660,8 +2660,8 @@ void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 		       struct sk_buff *skb);
 
 #define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC)
-static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
-				   struct sk_buff *skb)
+static inline void sock_do_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+				      struct sk_buff *skb, bool tsflags_any)
 {
 #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL)			| \
 			   (1UL << SOCK_RCVTSTAMP)			| \
@@ -2670,8 +2670,7 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 #define TSFLAGS_ANY	  (SOF_TIMESTAMPING_SOFTWARE			| \
 			   SOF_TIMESTAMPING_RAW_HARDWARE)
 
-	if (sk->sk_flags & FLAGS_RECV_CMSGS ||
-	    READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+	if (sk->sk_flags & FLAGS_RECV_CMSGS || tsflags_any)
 		__sock_recv_cmsgs(msg, sk, skb);
 	else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
 		sock_write_timestamp(sk, skb->tstamp);
@@ -2679,6 +2678,13 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 		sock_write_timestamp(sk, 0);
 }
 
+static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+				   struct sk_buff *skb)
+{
+	return sock_do_recv_cmsgs(msg, sk, skb,
+				  READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY);
+}
+
 void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags);
 
 /**
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a9bb9ce5438e..001bb4579330 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2084,7 +2084,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
 		UDP_INC_STATS(sock_net(sk),
 			      UDP_MIB_INDATAGRAMS, is_udplite);
 
-	sock_recv_cmsgs(msg, sk, skb);
+	sock_do_recv_cmsgs(msg, sk, skb, udp_sk(sk)->tsflags_any);
 
 	/* Copy the address. */
 	if (sin) {
@@ -3191,6 +3191,7 @@ struct proto udp_prot = {
 	.destroy		= udp_destroy_sock,
 	.setsockopt		= udp_setsockopt,
 	.getsockopt		= udp_getsockopt,
+	.tsflags		= udp_set_tsflags,
 	.sendmsg		= udp_sendmsg,
 	.recvmsg		= udp_recvmsg,
 	.splice_eof		= udp_splice_eof,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c6ea438b5c75..28e2eb331ceb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -532,7 +532,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (!peeking)
 		SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
 
-	sock_recv_cmsgs(msg, sk, skb);
+	sock_do_recv_cmsgs(msg, sk, skb, udp_sk(sk)->tsflags_any);
 
 	/* Copy the address. */
 	if (msg->msg_name) {
@@ -1917,6 +1917,7 @@ struct proto udpv6_prot = {
 	.destroy		= udpv6_destroy_sock,
 	.setsockopt		= udpv6_setsockopt,
 	.getsockopt		= udpv6_getsockopt,
+	.tsflags		= udp_set_tsflags,
 	.sendmsg		= udpv6_sendmsg,
 	.recvmsg		= udpv6_recvmsg,
 	.splice_eof		= udpv6_splice_eof,
-- 
2.48.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ