[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250825195947.4073595-4-edumazet@google.com>
Date: Mon, 25 Aug 2025 19:59:47 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>, netdev@...r.kernel.org, eric.dumazet@...il.com,
Willem de Bruijn <willemb@...gle.com>, Kuniyuki Iwashima <kuniyu@...gle.com>,
Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next 3/3] net: add new sk->sk_drops1 field
sk->sk_drops can be heavily contended when
changed from many cpus.
Instead using too expensive per-cpu data structure,
add a second sk->sk_drops1 field and change
sk_drops_inc() to be NUMA aware.
This patch adds 64 bytes per socket.
For hosts having more than two memory nodes, sk_drops_inc()
might not be optimal and can be refined later.
Tested with the following stress test, sending about 11 Mpps
to a dual socket AMD EPYC 7B13 64-Core.
super_netperf 20 -t UDP_STREAM -H DUT -l10 -- -n -P,1000 -m 120
Note: due to socket lookup, only one UDP socket will receive
packets on DUT.
Then measure receiver (DUT) behavior. We can see
consumer and BH handlers can process more packets per second.
Before:
nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams 855592 0.0
Udp6InErrors 5621467 0.0
Udp6RcvbufErrors 5621467 0.0
After:
nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams 914537 0.0
Udp6InErrors 6888487 0.0
Udp6RcvbufErrors 6888487 0.0
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
include/net/sock.h | 20 +++++++++++++++++++
.../selftests/bpf/progs/bpf_iter_netlink.c | 3 ++-
.../selftests/bpf/progs/bpf_iter_udp4.c | 2 +-
.../selftests/bpf/progs/bpf_iter_udp6.c | 2 +-
4 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index f40e3c4883be32c8282694ab215bcf79eb87cbd7..318169eb1a3d40eefac50147012551614abc6f7a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -282,6 +282,7 @@ struct sk_filter;
* @sk_err_soft: errors that don't cause failure but are the cause of a
* persistent failure not just 'timed out'
* @sk_drops: raw/udp drops counter
+ * @sk_drops1: second drops counter
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_uid: user id of owner
@@ -571,6 +572,11 @@ struct sock {
atomic_t sk_drops ____cacheline_aligned_in_smp;
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
+#if defined(CONFIG_NUMA)
+ atomic_t sk_drops1 ____cacheline_aligned_in_smp;
+#else
+ atomic_t sk_drops1;
+#endif
};
struct sock_bh_locked {
@@ -2684,17 +2690,31 @@ struct sock_skb_cb {
static inline void sk_drops_inc(struct sock *sk)
{
+#if defined(CONFIG_NUMA)
+ int n = numa_node_id() % 2;
+
+ if (n)
+ atomic_inc(&sk->sk_drops1);
+ else
+ atomic_inc(&sk->sk_drops);
+#else
atomic_inc(&sk->sk_drops);
+#endif
}
static inline int sk_drops_read(const struct sock *sk)
{
+#if defined(CONFIG_NUMA)
+ return atomic_read(&sk->sk_drops) + atomic_read(&sk->sk_drops1);
+#else
return atomic_read(&sk->sk_drops);
+#endif
}
static inline void sk_drops_reset(struct sock *sk)
{
atomic_set(&sk->sk_drops, 0);
+ atomic_set(&sk->sk_drops1, 0);
}
static inline void
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 00b2ceae81fb0914f2de3634eb342004e8bc3c5b..31ad9fcc6022d5d31b9c6a35daacaad7c887a51f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -57,7 +57,8 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
inode = SOCK_INODE(sk);
bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
}
- BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
+ BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n",
+ s->sk_drops.counter + s->sk_drops1.counter, ino);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index ffbd4b116d17ffbb9f14440c788e50490fb0f4e0..192ab5693a7131c1ec5879e539651c21f6f3c9ae 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -64,7 +64,7 @@ int dump_udp4(struct bpf_iter__udp *ctx)
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
- inet->sk.sk_drops.counter);
+ inet->sk.sk_drops.counter + inet->sk.sk_drops1.counter);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 47ff7754f4fda4c9db92fbf1dc2e6a68f044174e..5170bdf458fa1b9a4eea9240fbaa5934182a7776 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx)
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
- inet->sk.sk_drops.counter);
+ inet->sk.sk_drops.counter + inet->sk.sk_drops1.counter);
return 0;
}
--
2.51.0.261.g7ce5a0a67e-goog
Powered by blists - more mailing lists