[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <176424684236.194326.12739516403715190883.stgit@firesoul>
Date: Thu, 27 Nov 2025 13:34:02 +0100
From: Jesper Dangaard Brouer <hawk@...nel.org>
To: netfilter-devel@...r.kernel.org, Pablo Neira Ayuso <pablo@...filter.org>,
Florian Westphal <fw@...len.de>
Cc: Jesper Dangaard Brouer <hawk@...nel.org>, netdev@...r.kernel.org,
phil@....cc, Eric Dumazet <eric.dumazet@...il.com>,
"David S. Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, kernel-team@...udflare.com,
mfleming@...udflare.com, matt@...dmodwrite.com
Subject: [PATCH nf-next RFC 2/3] xt_statistic: do nth-mode accounting per CPU
The atomic cmpxchg operations for the nth-mode matching is a scaling
concern, on our production servers with 192 CPUs. The iptables rules that
does sampling of every 10000 packets exists on INPUT and OUTPUT chains.
Thus, these nth-counter rules are hit for every packets on the system with
high concurrency.
Our use-case is statistical sampling, where we don't need an accurate packet
across all CPUs in the system. Thus, we implement per-CPU counters for the
nth-mode match.
This replaces the XT_STATISTIC_MODE_NTH, to avoid having to change userspace
tooling. We keep and move atomic variant under XT_STATISTIC_MODE_NTH_ATOMIC
mode, which userspace can easily be extended to leverage if this is
necessary.
Signed-off-by: Jesper Dangaard Brouer <hawk@...nel.org>
---
include/uapi/linux/netfilter/xt_statistic.h | 1 +
net/netfilter/xt_statistic.c | 37 ++++++++++++++++++++++++++-
2 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/netfilter/xt_statistic.h b/include/uapi/linux/netfilter/xt_statistic.h
index bbce6fcb26e3..f399dd27ff61 100644
--- a/include/uapi/linux/netfilter/xt_statistic.h
+++ b/include/uapi/linux/netfilter/xt_statistic.h
@@ -7,6 +7,7 @@
enum xt_statistic_mode {
XT_STATISTIC_MODE_RANDOM,
XT_STATISTIC_MODE_NTH,
+ XT_STATISTIC_MODE_NTH_ATOMIC,
__XT_STATISTIC_MODE_MAX
};
#define XT_STATISTIC_MODE_MAX (__XT_STATISTIC_MODE_MAX - 1)
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index d352c171f24d..165bff0a76e5 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -17,6 +17,7 @@
struct xt_statistic_priv {
atomic_t count;
+ u32 __percpu *cnt_pcpu;
} ____cacheline_aligned_in_smp;
MODULE_LICENSE("GPL");
@@ -63,6 +64,21 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
ret = !ret;
break;
case XT_STATISTIC_MODE_NTH:
+ pkt_cnt = gso_pkt_cnt(skb);
+ do {
+ match = false;
+ oval = this_cpu_read(*priv->cnt_pcpu);
+ nval = oval + pkt_cnt;
+ if (nval > info->u.nth.every) {
+ match = true;
+ nval = nval - info->u.nth.every - 1;
+ nval = min(nval, info->u.nth.every);
+ }
+ } while (this_cpu_cmpxchg(*priv->cnt_pcpu, oval, nval) != oval);
+ if (match)
+ ret = !ret;
+ break;
+ case XT_STATISTIC_MODE_NTH_ATOMIC:
pkt_cnt = gso_pkt_cnt(skb);
do {
match = false;
@@ -85,6 +101,10 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int statistic_mt_check(const struct xt_mtchk_param *par)
{
struct xt_statistic_info *info = par->matchinfo;
+ struct xt_statistic_priv *priv;
+ u32 *this_cpu;
+ u32 nth_count;
+ int cpu;
if (info->mode > XT_STATISTIC_MODE_MAX ||
info->flags & ~XT_STATISTIC_MASK)
@@ -93,7 +113,21 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
if (info->master == NULL)
return -ENOMEM;
- atomic_set(&info->master->count, info->u.nth.count);
+ priv = info->master;
+
+ priv->cnt_pcpu = alloc_percpu(u32);
+ if (!priv->cnt_pcpu) {
+ kfree(priv);
+ return -ENOMEM;
+ }
+
+ /* Userspace specifies start nth.count value */
+ nth_count = info->u.nth.count;
+ for_each_possible_cpu(cpu) {
+ this_cpu = per_cpu_ptr(priv->cnt_pcpu, cpu);
+ (*this_cpu) = nth_count;
+ }
+ atomic_set(&priv->count, nth_count);
return 0;
}
@@ -102,6 +136,7 @@ static void statistic_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_statistic_info *info = par->matchinfo;
+ free_percpu(info->master->cnt_pcpu);
kfree(info->master);
}
Powered by blists - more mailing lists