[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260204132428.224465-2-p@1g4.org>
Date: Wed, 04 Feb 2026 13:24:53 +0000
From: Paul Moses <p@....org>
To: Victor Nogueira <victor@...atatu.com>, Jamal Hadi Salim <jhs@...atatu.com>, Cong Wang <xiyou.wangcong@...il.com>, Jiri Pirko <jiri@...nulli.us>
Cc: "David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>, netdev@...r.kernel.org, linux-kernel@...r.kernel.org, Paul Moses <p@....org>, stable@...r.kernel.org
Subject: [PATCH net v4 1/1] net/sched: act_gate: protect parameters with RCU on replace
Convert act_gate parameters to an RCU protected snapshot. Allocate a new
snapshot on CREATE and REPLACE, swap it under tcf_lock, and free the old
snapshot via call_rcu() to avoid races with the hrtimer callback and the
dump path.
Fixes: a51c328df310 ("net: qos: introduce a gate control flow action")
Cc: stable@...r.kernel.org
Signed-off-by: Paul Moses <p@....org>
---
include/net/tc_act/tc_gate.h | 29 +++-
net/sched/act_gate.c | 300 ++++++++++++++++++++++++++---------
2 files changed, 246 insertions(+), 83 deletions(-)
diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index c1a67149c6b62..5f4fc407f7bf6 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -32,6 +32,7 @@ struct tcf_gate_params {
s32 tcfg_clockid;
size_t num_entries;
struct list_head entries;
+ struct rcu_head rcu;
};
#define GATE_ACT_GATE_OPEN BIT(0)
@@ -39,7 +40,7 @@ struct tcf_gate_params {
struct tcf_gate {
struct tc_action common;
- struct tcf_gate_params param;
+ struct tcf_gate_params __rcu *param;
u8 current_gate_status;
ktime_t current_close_time;
u32 current_entry_octets;
@@ -51,47 +52,60 @@ struct tcf_gate {
#define to_gate(a) ((struct tcf_gate *)a)
+static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a)
+{
+ struct tcf_gate *gact = to_gate(a);
+
+ return rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
+}
+
static inline s32 tcf_gate_prio(const struct tc_action *a)
{
+ struct tcf_gate_params *p = tcf_gate_params_locked(a);
s32 tcfg_prio;
- tcfg_prio = to_gate(a)->param.tcfg_priority;
+ tcfg_prio = p->tcfg_priority;
return tcfg_prio;
}
static inline u64 tcf_gate_basetime(const struct tc_action *a)
{
+ struct tcf_gate_params *p = tcf_gate_params_locked(a);
u64 tcfg_basetime;
- tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+ tcfg_basetime = p->tcfg_basetime;
return tcfg_basetime;
}
static inline u64 tcf_gate_cycletime(const struct tc_action *a)
{
+ struct tcf_gate_params *p = tcf_gate_params_locked(a);
u64 tcfg_cycletime;
- tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+ tcfg_cycletime = p->tcfg_cycletime;
return tcfg_cycletime;
}
static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
{
+ struct tcf_gate_params *p = tcf_gate_params_locked(a);
u64 tcfg_cycletimeext;
- tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+ tcfg_cycletimeext = p->tcfg_cycletime_ext;
return tcfg_cycletimeext;
}
static inline u32 tcf_gate_num_entries(const struct tc_action *a)
{
+ struct tcf_gate_params *p = tcf_gate_params_locked(a);
u32 num_entries;
- num_entries = to_gate(a)->param.num_entries;
+ num_entries = p->num_entries;
return num_entries;
}
@@ -100,12 +114,11 @@ static inline struct action_gate_entry
*tcf_gate_get_list(const struct tc_action *a)
{
struct action_gate_entry *oe;
- struct tcf_gate_params *p;
+ struct tcf_gate_params *p = tcf_gate_params_locked(a);
struct tcfg_gate_entry *entry;
u32 num_entries;
int i = 0;
- p = &to_gate(a)->param;
num_entries = p->num_entries;
list_for_each_entry(entry, &p->entries, list)
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index c1f75f2727576..e93b77edd0694 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -32,9 +32,12 @@ static ktime_t gate_get_time(struct tcf_gate *gact)
return KTIME_MAX;
}
-static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+static void tcf_gate_params_free_rcu(struct rcu_head *head);
+
+static void gate_get_start_time(struct tcf_gate *gact,
+ const struct tcf_gate_params *param,
+ ktime_t *start)
{
- struct tcf_gate_params *param = &gact->param;
ktime_t now, base, cycle;
u64 n;
@@ -69,12 +72,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
{
struct tcf_gate *gact = container_of(timer, struct tcf_gate,
hitimer);
- struct tcf_gate_params *p = &gact->param;
+ struct tcf_gate_params *p;
struct tcfg_gate_entry *next;
ktime_t close_time, now;
spin_lock(&gact->tcf_lock);
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
next = gact->next_entry;
/* cycle start, clear pending bit, clear total octets */
@@ -225,6 +230,42 @@ static void release_entry_list(struct list_head *entries)
}
}
+static int tcf_gate_copy_entries(struct tcf_gate_params *dst,
+ const struct tcf_gate_params *src,
+ struct netlink_ext_ack *extack)
+{
+ struct tcfg_gate_entry *entry, *new;
+ int i = 0;
+
+ list_for_each_entry(entry, &src->entries, list) {
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+ goto err_free;
+ }
+
+ new->index = entry->index;
+ new->gate_state = entry->gate_state;
+ new->interval = entry->interval;
+ new->ipv = entry->ipv;
+ new->maxoctets = entry->maxoctets;
+
+ list_add_tail(&new->list, &dst->entries);
+ i++;
+ }
+
+ dst->num_entries = i;
+ return 0;
+
+err_free:
+ list_for_each_entry_safe(new, entry, &dst->entries, list) {
+ list_del(&new->list);
+ kfree(new);
+ }
+ dst->num_entries = 0;
+ return -ENOMEM;
+}
+
static int parse_gate_list(struct nlattr *list_attr,
struct tcf_gate_params *sched,
struct netlink_ext_ack *extack)
@@ -243,7 +284,7 @@ static int parse_gate_list(struct nlattr *list_attr,
continue;
}
- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
NL_SET_ERR_MSG(extack, "Not enough memory for entry");
err = -ENOMEM;
@@ -266,6 +307,7 @@ static int parse_gate_list(struct nlattr *list_attr,
release_list:
release_entry_list(&sched->entries);
+ sched->num_entries = 0;
return err;
}
@@ -274,36 +316,64 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
enum tk_offsets tko, s32 clockid,
bool do_init)
{
+ struct tcf_gate_params *p;
+
if (!do_init) {
- if (basetime == gact->param.tcfg_basetime &&
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
+ if (basetime == p->tcfg_basetime &&
tko == gact->tk_offset &&
- clockid == gact->param.tcfg_clockid)
+ clockid == p->tcfg_clockid)
return;
-
- spin_unlock_bh(&gact->tcf_lock);
- hrtimer_cancel(&gact->hitimer);
- spin_lock_bh(&gact->tcf_lock);
}
- gact->param.tcfg_basetime = basetime;
- gact->param.tcfg_clockid = clockid;
gact->tk_offset = tko;
hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT);
}
+static int gate_clockid_to_offset(s32 clockid, enum tk_offsets *off,
+ struct netlink_ext_ack *extack)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ *off = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ *off = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ *off = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ *off = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
+ const struct tcf_gate_params *cur_p = NULL;
enum tk_offsets tk_offset = TK_OFFS_TAI;
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GATE_MAX + 1];
+ struct tcf_gate_params *old_p, *p;
struct tcf_chain *goto_ch = NULL;
u64 cycletime = 0, basetime = 0;
- struct tcf_gate_params *p;
+ bool clockid_changed = false;
+ bool use_old_entries = false;
+ bool list_provided = false;
s32 clockid = CLOCK_TAI;
struct tcf_gate *gact;
+ u64 cycletime_ext = 0;
+ int parsed = -ENOENT;
struct tc_gate *parm;
int ret = 0, err;
u32 gflags = 0;
@@ -323,23 +393,9 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (tb[TCA_GATE_CLOCKID]) {
clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
- switch (clockid) {
- case CLOCK_REALTIME:
- tk_offset = TK_OFFS_REAL;
- break;
- case CLOCK_MONOTONIC:
- tk_offset = TK_OFFS_MAX;
- break;
- case CLOCK_BOOTTIME:
- tk_offset = TK_OFFS_BOOT;
- break;
- case CLOCK_TAI:
- tk_offset = TK_OFFS_TAI;
- break;
- default:
- NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
- return -EINVAL;
- }
+ err = gate_clockid_to_offset(clockid, &tk_offset, extack);
+ if (err)
+ return err;
}
parm = nla_data(tb[TCA_GATE_PARMS]);
@@ -376,48 +432,128 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
gact = to_gate(*a);
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&gact->param.entries);
-
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
- if (err < 0)
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ err = -ENOMEM;
goto release_idr;
+ }
+ INIT_LIST_HEAD(&p->entries);
- spin_lock_bh(&gact->tcf_lock);
- p = &gact->param;
+ list_provided = !!tb[TCA_GATE_ENTRY_LIST];
- if (tb[TCA_GATE_CYCLE_TIME])
- cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+ if (list_provided) {
+ parsed = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
+ if (parsed < 0) {
+ err = parsed;
+ goto release_mem;
+ }
+ }
- if (tb[TCA_GATE_ENTRY_LIST]) {
- err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
+ if (!list_provided || parsed == 0) {
+ if (ret == ACT_P_CREATED) {
+ NL_SET_ERR_MSG(extack, "The entry list is empty");
+ err = -EINVAL;
+ goto release_mem;
+ }
+ use_old_entries = true;
+ }
+
+ if (use_old_entries) {
+ cur_p = rcu_dereference_protected(gact->param,
+ lockdep_rtnl_is_held());
+ if (!cur_p) {
+ NL_SET_ERR_MSG(extack, "Missing schedule entries");
+ err = -EINVAL;
+ goto release_mem;
+ }
+
+ if (!tb[TCA_GATE_PRIORITY])
+ prio = cur_p->tcfg_priority;
+
+ if (!tb[TCA_GATE_BASE_TIME])
+ basetime = cur_p->tcfg_basetime;
+
+ if (!tb[TCA_GATE_FLAGS])
+ gflags = cur_p->tcfg_flags;
+
+ if (!tb[TCA_GATE_CLOCKID]) {
+ clockid = cur_p->tcfg_clockid;
+ err = gate_clockid_to_offset(clockid, &tk_offset, extack);
+ if (err)
+ goto release_mem;
+ }
+
+ if (!tb[TCA_GATE_CYCLE_TIME])
+ cycletime = cur_p->tcfg_cycletime;
+
+ if (!tb[TCA_GATE_CYCLE_TIME_EXT])
+ cycletime_ext = cur_p->tcfg_cycletime_ext;
+
+ err = tcf_gate_copy_entries(p, cur_p, extack);
if (err < 0)
- goto chain_put;
+ goto release_mem;
}
+ p->tcfg_priority = prio;
+ p->tcfg_flags = gflags;
+
+ if (tb[TCA_GATE_CYCLE_TIME])
+ cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+
if (!cycletime) {
struct tcfg_gate_entry *entry;
ktime_t cycle = 0;
- list_for_each_entry(entry, &p->entries, list)
- cycle = ktime_add_ns(cycle, entry->interval);
- cycletime = cycle;
+ if (list_provided && !use_old_entries) {
+ list_for_each_entry(entry, &p->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+ cycletime = cycle;
+ } else if (cur_p) {
+ cycletime = cur_p->tcfg_cycletime;
+ }
if (!cycletime) {
err = -EINVAL;
- goto chain_put;
+ goto release_mem;
}
}
p->tcfg_cycletime = cycletime;
if (tb[TCA_GATE_CYCLE_TIME_EXT])
- p->tcfg_cycletime_ext =
- nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+ cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+ p->tcfg_cycletime_ext = cycletime_ext;
- gate_setup_timer(gact, basetime, tk_offset, clockid,
- ret == ACT_P_CREATED);
- p->tcfg_priority = prio;
- p->tcfg_flags = gflags;
- gate_get_start_time(gact, &start);
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_mem;
+
+ if (ret != ACT_P_CREATED) {
+ cur_p = rcu_dereference_protected(gact->param,
+ lockdep_rtnl_is_held());
+ if (cur_p && clockid != cur_p->tcfg_clockid) {
+ hrtimer_cancel(&gact->hitimer);
+ clockid_changed = true;
+ }
+ }
+
+ spin_lock_bh(&gact->tcf_lock);
+ if (ret == ACT_P_CREATED) {
+ gate_setup_timer(gact, basetime, tk_offset, clockid, true);
+ } else {
+ cur_p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
+ if (!cur_p) {
+ err = -EINVAL;
+ goto chain_put;
+ }
+ if (clockid_changed)
+ gate_setup_timer(gact, basetime, tk_offset, clockid, false);
+ }
+ p->tcfg_basetime = basetime;
+ p->tcfg_clockid = clockid;
+ gate_get_start_time(gact, p, &start);
+
+ old_p = rcu_replace_pointer(gact->param, p,
+ lockdep_is_held(&gact->tcf_lock));
gact->current_close_time = start;
gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
@@ -434,6 +570,9 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+ if (old_p)
+ call_rcu(&old_p->rcu, tcf_gate_params_free_rcu);
+
return ret;
chain_put:
@@ -441,26 +580,36 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+release_mem:
+ release_entry_list(&p->entries);
+ kfree(p);
release_idr:
/* action is not inserted in any list: it's safe to init hitimer
* without taking tcf_lock.
*/
if (ret == ACT_P_CREATED)
- gate_setup_timer(gact, gact->param.tcfg_basetime,
- gact->tk_offset, gact->param.tcfg_clockid,
- true);
+ gate_setup_timer(gact, basetime, tk_offset, clockid, true);
tcf_idr_release(*a, bind);
return err;
}
+static void tcf_gate_params_free_rcu(struct rcu_head *head)
+{
+ struct tcf_gate_params *p = container_of(head, struct tcf_gate_params, rcu);
+
+ release_entry_list(&p->entries);
+ kfree(p);
+}
+
static void tcf_gate_cleanup(struct tc_action *a)
{
struct tcf_gate *gact = to_gate(a);
struct tcf_gate_params *p;
- p = &gact->param;
hrtimer_cancel(&gact->hitimer);
- release_entry_list(&p->entries);
+ p = rcu_replace_pointer(gact->param, NULL, lockdep_rtnl_is_held());
+ if (p)
+ call_rcu(&p->rcu, tcf_gate_params_free_rcu);
}
static int dumping_entry(struct sk_buff *skb,
@@ -499,65 +648,66 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_gate *gact = to_gate(a);
+ struct tcfg_gate_entry *entry;
+ struct tcf_gate_params *p;
+ struct nlattr *entry_list;
struct tc_gate opt = {
.index = gact->tcf_index,
.refcnt = refcount_read(&gact->tcf_refcnt) - ref,
.bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
};
- struct tcfg_gate_entry *entry;
- struct tcf_gate_params *p;
- struct nlattr *entry_list;
struct tcf_t t;
- spin_lock_bh(&gact->tcf_lock);
- opt.action = gact->tcf_action;
-
- p = &gact->param;
+ opt.action = READ_ONCE(gact->tcf_action);
if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ rcu_read_lock();
+ p = rcu_dereference(gact->param);
+
if (nla_put_u64_64bit(skb, TCA_GATE_BASE_TIME,
p->tcfg_basetime, TCA_GATE_PAD))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME,
p->tcfg_cycletime, TCA_GATE_PAD))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME_EXT,
p->tcfg_cycletime_ext, TCA_GATE_PAD))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
if (nla_put_s32(skb, TCA_GATE_CLOCKID, p->tcfg_clockid))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
if (nla_put_u32(skb, TCA_GATE_FLAGS, p->tcfg_flags))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
if (nla_put_s32(skb, TCA_GATE_PRIORITY, p->tcfg_priority))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
entry_list = nla_nest_start_noflag(skb, TCA_GATE_ENTRY_LIST);
if (!entry_list)
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
list_for_each_entry(entry, &p->entries, list) {
if (dumping_entry(skb, entry) < 0)
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
}
nla_nest_end(skb, entry_list);
+ rcu_read_unlock();
tcf_tm_dump(&t, &gact->tcf_tm);
if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD))
goto nla_put_failure;
- spin_unlock_bh(&gact->tcf_lock);
return skb->len;
+nla_put_failure_rcu:
+ rcu_read_unlock();
nla_put_failure:
- spin_unlock_bh(&gact->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
--
2.52.GIT
Powered by blists - more mailing lists