[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e7942894834908523a65147c2cce026537059f87.1738940816.git.pabeni@redhat.com>
Date: Fri, 7 Feb 2025 17:23:45 +0100
From: Paolo Abeni <pabeni@...hat.com>
To: netdev@...r.kernel.org
Cc: Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Eric Dumazet <edumazet@...gle.com>,
Kuniyuki Iwashima <kuniyu@...zon.com>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Simon Horman <horms@...nel.org>,
Neal Cardwell <ncardwell@...gle.com>,
David Ahern <dsahern@...nel.org>
Subject: [RFC PATCH 2/2] udp: avoid false sharing via protocol specific set_tsflags
After commit 5d4cc87414c5 ("net: reorganize "struct sock" fields"),
the sk_tsflags field shares the same cacheline with sk_forward_alloc.
The UDP protocol does not acquire the sock lock in the RX path;
forward allocations are protected via the receive queue spinlock.
Due to the above, under high packet rate traffic, when the BH and the
user-space process run on different CPUs, UDP packet reception will
experience a cache miss while accessing sk_tsflags.
Similarly to commit f796feabb9f5 ("udp: add local "peek offset enabled"
flag"), add a new field in the udp_sock struct to store a copy of the
ts_flags value in a cache friendly manner.
Use the newly introduced protocol op to sync-up the new field at every
sk_tsflags update.
With this patch applied, on an AMD epic server with i40e NICs, I
measured a 10% performance improvement for small packets UDP flood
performance tests - possibly a larger delta could be observed with more
recent H/W.
Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
include/linux/udp.h | 12 ++++++++++++
include/net/sock.h | 14 ++++++++++----
net/ipv4/udp.c | 3 ++-
net/ipv6/udp.c | 3 ++-
4 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0807e21cfec9..b186ac20fd38 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -101,6 +101,9 @@ struct udp_sock {
/* Cache friendly copy of sk->sk_peek_off >= 0 */
bool peeking_with_offset;
+
+ /* Cache friendly copy of sk_tsflags & TSFLAGS_ANY */
+ bool tsflags_any;
};
#define udp_test_bit(nr, sk) \
@@ -125,6 +128,15 @@ static inline int udp_set_peek_off(struct sock *sk, int val)
return 0;
}
+static inline int udp_set_tsflags(struct sock *sk, int val)
+{
+ WRITE_ONCE(udp_sk(sk)->tsflags_any, !!(val & TSFLAGS_ANY));
+ if (val & SOF_TIMESTAMPING_OPT_ID &&
+ !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID))
+ atomic_set(&sk->sk_tskey, 0);
+ return 0;
+}
+
static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
{
udp_assign_bit(NO_CHECK6_TX, sk, val);
diff --git a/include/net/sock.h b/include/net/sock.h
index 282dd23b90dc..5767c2dace2a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2660,8 +2660,8 @@ void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC)
-static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb)
+static inline void sock_do_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb, bool tsflags_any)
{
#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_RCVTSTAMP) | \
@@ -2670,8 +2670,7 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
- if (sk->sk_flags & FLAGS_RECV_CMSGS ||
- READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+ if (sk->sk_flags & FLAGS_RECV_CMSGS || tsflags_any)
__sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
@@ -2679,6 +2678,13 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
sock_write_timestamp(sk, 0);
}
+static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return sock_do_recv_cmsgs(msg, sk, skb,
+ READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY);
+}
+
void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags);
/**
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a9bb9ce5438e..001bb4579330 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2084,7 +2084,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
- sock_recv_cmsgs(msg, sk, skb);
+ sock_do_recv_cmsgs(msg, sk, skb, udp_sk(sk)->tsflags_any);
/* Copy the address. */
if (sin) {
@@ -3191,6 +3191,7 @@ struct proto udp_prot = {
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
.getsockopt = udp_getsockopt,
+ .tsflags = udp_set_tsflags,
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.splice_eof = udp_splice_eof,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c6ea438b5c75..28e2eb331ceb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -532,7 +532,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (!peeking)
SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
- sock_recv_cmsgs(msg, sk, skb);
+ sock_do_recv_cmsgs(msg, sk, skb, udp_sk(sk)->tsflags_any);
/* Copy the address. */
if (msg->msg_name) {
@@ -1917,6 +1917,7 @@ struct proto udpv6_prot = {
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,
+ .tsflags = udp_set_tsflags,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
.splice_eof = udpv6_splice_eof,
--
2.48.1
Powered by blists - more mailing lists