[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211016084910.4029084-7-bigeasy@linutronix.de>
Date: Sat, 16 Oct 2021 10:49:07 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: netdev@...r.kernel.org, netfilter-devel@...r.kernel.org
Cc: Jakub Kicinski <kuba@...nel.org>,
"David S. Miller" <davem@...emloft.net>,
Pablo Neira Ayuso <pablo@...filter.org>,
Jozsef Kadlecsik <kadlec@...filter.org>,
Florian Westphal <fw@...len.de>,
Jamal Hadi Salim <jhs@...atatu.com>,
Cong Wang <xiyou.wangcong@...il.com>,
Jiri Pirko <jiri@...nulli.us>,
"Ahmed S. Darwish" <a.darwish@...utronix.de>,
Eric Dumazet <edumazet@...gle.com>,
Thomas Gleixner <tglx@...utronix.de>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [PATCH net-next 6/9] net: sched: Protect Qdisc::bstats with u64_stats
From: "Ahmed S. Darwish" <a.darwish@...utronix.de>
The not-per-CPU variant of qdisc tc (traffic control) statistics,
Qdisc::gnet_stats_basic_packed bstats, is protected with Qdisc::running
sequence counter.
This sequence counter is used for reliably protecting bstats reads from
parallel writes. Meanwhile, the seqcount's write section covers a much
wider area than bstats update: qdisc_run_begin() => qdisc_run_end().
That read/write section asymmetry can lead to needless retries of the
read section. To prepare for removing the Qdisc::running sequence
counter altogether, introduce a u64_stats sync point inside bstats
instead.
Modify _bstats_update() to start/end the bstats u64_stats write
section.
For bisectability, and finer commits granularity, the bstats read
section is still protected with a Qdisc::running read/retry loop and
qdisc_run_begin/end() still starts/ends that seqcount write section.
Once all call sites are modified to use _bstats_update(), the
Qdisc::running seqcount will be removed and bstats read/retry loop will
be modified to utilize the internal u64_stats sync point.
Note, using u64_stats implies no sequence counter protection for 64-bit
architectures. This can lead to the statistics "packets" vs. "bytes"
values getting out of sync on rare occasions. The individual values will
still be valid.
[bigeasy: Minor commit message edits, init all gnet_stats_basic_packed.]
Signed-off-by: Ahmed S. Darwish <a.darwish@...utronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
include/net/gen_stats.h | 2 ++
include/net/sch_generic.h | 2 ++
net/core/gen_estimator.c | 2 +-
net/core/gen_stats.c | 14 ++++++++++++--
net/netfilter/xt_RATEEST.c | 1 +
net/sched/act_api.c | 2 ++
net/sched/sch_atm.c | 1 +
net/sched/sch_cbq.c | 1 +
net/sched/sch_drr.c | 1 +
net/sched/sch_ets.c | 2 +-
net/sched/sch_generic.c | 1 +
net/sched/sch_gred.c | 4 +++-
net/sched/sch_hfsc.c | 1 +
net/sched/sch_htb.c | 7 +++++--
net/sched/sch_mq.c | 2 +-
net/sched/sch_mqprio.c | 5 +++--
net/sched/sch_qfq.c | 1 +
17 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index d47155f5db5d7..304d792f79776 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -11,6 +11,7 @@
struct gnet_stats_basic_packed {
__u64 bytes;
__u64 packets;
+ struct u64_stats_sync syncp;
};
struct gnet_stats_basic_cpu {
@@ -34,6 +35,7 @@ struct gnet_dump {
struct tc_stats tc_stats;
};
+void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b);
int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
struct gnet_dump *d, int padattr);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7bc2d30b5c067..d7746aea3cecf 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -852,8 +852,10 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
__u64 bytes, __u32 packets)
{
+ u64_stats_update_begin(&bstats->syncp);
bstats->bytes += bytes;
bstats->packets += packets;
+ u64_stats_update_end(&bstats->syncp);
}
static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 205df8b5116e5..64978e77368f4 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -62,7 +62,7 @@ struct net_rate_estimator {
static void est_fetch_counters(struct net_rate_estimator *e,
struct gnet_stats_basic_packed *b)
{
- memset(b, 0, sizeof(*b));
+ gnet_stats_basic_packed_init(b);
if (e->stats_lock)
spin_lock(e->stats_lock);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 6ec11289140b6..f2e12fe7112b1 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -18,7 +18,7 @@
#include <linux/gen_stats.h>
#include <net/netlink.h>
#include <net/gen_stats.h>
-
+#include <net/sch_generic.h>
static inline int
gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
@@ -114,6 +114,15 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
}
EXPORT_SYMBOL(gnet_stats_start_copy);
+/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
+void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b)
+{
+ b->bytes = 0;
+ b->packets = 0;
+ u64_stats_init(&b->syncp);
+}
+EXPORT_SYMBOL(gnet_stats_basic_packed_init);
+
static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu)
{
@@ -167,8 +176,9 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
struct gnet_stats_basic_packed *b,
int type)
{
- struct gnet_stats_basic_packed bstats = {0};
+ struct gnet_stats_basic_packed bstats;
+ gnet_stats_basic_packed_init(&bstats);
gnet_stats_add_basic(running, &bstats, cpu, b);
if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 0d5c422f87452..d5200725ca62c 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
if (!est)
goto err1;
+ gnet_stats_basic_packed_init(&est->bstats);
strlcpy(est->name, info->name, sizeof(est->name));
spin_lock_init(&est->lock);
est->refcnt = 1;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 7dd3a2dc5fa40..0302dad42df14 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -490,6 +490,8 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
if (!p->cpu_qstats)
goto err3;
}
+ gnet_stats_basic_packed_init(&p->tcfa_bstats);
+ gnet_stats_basic_packed_init(&p->tcfa_bstats_hw);
spin_lock_init(&p->tcfa_lock);
p->tcfa_index = index;
p->tcfa_tm.install = jiffies;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 7d8518176b45a..c8e1771383f9a 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
INIT_LIST_HEAD(&p->flows);
INIT_LIST_HEAD(&p->link.list);
+ gnet_stats_basic_packed_init(&p->link.bstats);
list_add(&p->link.list, &p->flows);
p->link.q = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops, sch->handle, extack);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index e0da15530f0e9..d01f6ec315f87 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1611,6 +1611,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl == NULL)
goto failure;
+ gnet_stats_basic_packed_init(&cl->bstats);
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 642cd179b7a75..319906e19a6ba 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -106,6 +106,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ gnet_stats_basic_packed_init(&cl->bstats);
cl->common.classid = classid;
cl->quantum = quantum;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index ed86b7021f6d0..83693107371f9 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -689,7 +689,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
q->classes[i].qdisc = NULL;
q->classes[i].quantum = 0;
q->classes[i].deficit = 0;
- memset(&q->classes[i].bstats, 0, sizeof(q->classes[i].bstats));
+ gnet_stats_basic_packed_init(&q->classes[i].bstats);
memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
}
return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8c64a552a64fe..ef27ff3ddee4f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -892,6 +892,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
qdisc_skb_head_init(&sch->q);
+ gnet_stats_basic_packed_init(&sch->bstats);
spin_lock_init(&sch->q.lock);
if (ops->static_flags & TCQ_F_CPUSTATS) {
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 621dc6afde8f3..2ddcbb2efdbbc 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -364,9 +364,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
hw_stats->handle = sch->handle;
hw_stats->parent = sch->parent;
- for (i = 0; i < MAX_DPs; i++)
+ for (i = 0; i < MAX_DPs; i++) {
+ gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]);
if (table->tab[i])
hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+ }
ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
/* Even if driver returns failure adjust the stats - in case offload
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b7ac30cca035d..ff6ff54806fcd 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1406,6 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
if (err)
return err;
+ gnet_stats_basic_packed_init(&q->root.bstats);
q->root.cl_common.classid = sch->handle;
q->root.sched = q;
q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5067a6e5d4fde..2e805b17efcf9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1311,7 +1311,7 @@ static void htb_offload_aggregate_stats(struct htb_sched *q,
struct htb_class *c;
unsigned int i;
- memset(&cl->bstats, 0, sizeof(cl->bstats));
+ gnet_stats_basic_packed_init(&cl->bstats);
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
@@ -1357,7 +1357,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
if (cl->leaf.q)
cl->bstats = cl->leaf.q->bstats;
else
- memset(&cl->bstats, 0, sizeof(cl->bstats));
+ gnet_stats_basic_packed_init(&cl->bstats);
cl->bstats.bytes += cl->bstats_bias.bytes;
cl->bstats.packets += cl->bstats_bias.packets;
} else {
@@ -1849,6 +1849,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl)
goto failure;
+ gnet_stats_basic_packed_init(&cl->bstats);
+ gnet_stats_basic_packed_init(&cl->bstats_bias);
+
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 9d58ecb4e80c6..704e14a58f09d 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -132,7 +132,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
unsigned int ntx;
sch->q.qlen = 0;
- memset(&sch->bstats, 0, sizeof(sch->bstats));
+ gnet_stats_basic_packed_init(&sch->bstats);
memset(&sch->qstats, 0, sizeof(sch->qstats));
/* MQ supports lockless qdiscs. However, statistics accounting needs
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 57427b40f0d2e..fe6b4a178fc9f 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
unsigned int ntx, tc;
sch->q.qlen = 0;
- memset(&sch->bstats, 0, sizeof(sch->bstats));
+ gnet_stats_basic_packed_init(&sch->bstats);
memset(&sch->qstats, 0, sizeof(sch->qstats));
/* MQ supports lockless qdiscs. However, statistics accounting needs
@@ -500,10 +500,11 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
int i;
__u32 qlen;
struct gnet_stats_queue qstats = {0};
- struct gnet_stats_basic_packed bstats = {0};
+ struct gnet_stats_basic_packed bstats;
struct net_device *dev = qdisc_dev(sch);
struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
+ gnet_stats_basic_packed_init(&bstats);
/* Drop lock here it will be reclaimed before touching
* statistics this is required because the d->lock we
* hold here is the look on dev_queue->qdisc_sleeping
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 58a9d42b52b8f..b6d989b69324d 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ gnet_stats_basic_packed_init(&cl->bstats);
cl->common.classid = classid;
cl->deficit = lmax;
--
2.33.0
Powered by blists - more mailing lists