[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200311173356.38181-3-petrm@mellanox.com>
Date: Wed, 11 Mar 2020 19:33:52 +0200
From: Petr Machata <petrm@...lanox.com>
To: netdev@...r.kernel.org
Cc: Petr Machata <petrm@...lanox.com>,
Jakub Kicinski <kuba@...nel.org>,
Roman Mashak <mrv@...atatu.com>, jhs@...atatu.com,
xiyou.wangcong@...il.com, davem@...emloft.net, jiri@...lanox.com,
mlxsw@...lanox.com
Subject: [PATCH net-next v2 2/6] net: sched: Allow extending set of supported RED flags
The qdiscs RED, GRED, SFQ and CHOKE use different subsets of the same pool
of global RED flags. These are passed in tc_red_qopt.flags. However none of
these qdiscs validate the flag field, and just copy it over wholesale to
internal structures, and later dump it back. (An exception is GRED, which
does validate for VQs -- however not for the main setup.)
A broken userspace can therefore configure a qdisc with arbitrary
unsupported flags, and later expect to see the flags on qdisc dump. The
current ABI therefore allows storage of several bits of custom data to
qdisc instances of the types mentioned above. How many bits, depends on
which flags are meaningful for the qdisc in question. E.g. SFQ recognizes
flags ECN and HARDDROP, and the rest is not interpreted.
If SFQ ever needs to support ADAPTATIVE, it needs another way of doing it,
and at the same time it needs to retain the possibility to store 6 bits of
uninterpreted data. Likewise RED, which adds a new flag later in this
patchset.
To that end, this patch adds a new function, red_get_flags(), to split the
passed flags of RED-like qdiscs to flags and user bits, and to validate the
flags that are passed in. It further adds a new attribute, TCA_RED_FLAGS,
to pass arbitrary flags.
Signed-off-by: Petr Machata <petrm@...lanox.com>
---
Notes:
v2:
- This patch is new.
include/net/red.h | 25 +++++++++++++++++++++++++
include/uapi/linux/pkt_sched.h | 16 ++++++++++++++++
net/sched/sch_red.c | 24 +++++++++++++++++++++---
3 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/include/net/red.h b/include/net/red.h
index 9665582c4687..5718d2b25637 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -179,6 +179,31 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
return true;
}
+static inline bool red_get_flags(unsigned char flags,
+ unsigned char historic_mask,
+ struct nlattr *flags_attr,
+ unsigned int supported_mask,
+ unsigned int *p_flags, unsigned char *p_userbits,
+ struct netlink_ext_ack *extack)
+{
+ if (flags && flags_attr) {
+ NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute");
+ return false;
+ }
+
+ *p_flags = flags & historic_mask;
+ if (flags_attr)
+ *p_flags |= nla_get_u32(flags_attr);
+
+ if (*p_flags & ~supported_mask) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported RED flags specified");
+ return false;
+ }
+
+ *p_userbits = flags & ~historic_mask;
+ return true;
+}
+
static inline void red_set_parms(struct red_parms *p,
u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
u8 Scell_log, u8 *stab, u32 max_P)
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index bbe791b24168..277df546e1a9 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -256,6 +256,7 @@ enum {
TCA_RED_PARMS,
TCA_RED_STAB,
TCA_RED_MAX_P,
+ TCA_RED_FLAGS, /* u32 */
__TCA_RED_MAX,
};
@@ -268,12 +269,27 @@ struct tc_red_qopt {
unsigned char Wlog; /* log(W) */
unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
unsigned char Scell_log; /* cell size for idle damping */
+
+ /* This field can be used for flags that a RED-like qdisc has
+ * historically supported. E.g. when configuring RED, it can be used for
+ * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN,
+ * HARDDROP. Etc. Because this field has not been validated, and is
+ * copied back on dump, any bits besides those to which a given qdisc
+ * has assigned a historical meaning need to be considered for free use
+ * by userspace tools.
+ *
+ * Any further flags need to be passed differently, e.g. through an
+ * attribute (such as TCA_RED_FLAGS above). Such attribute should allow
+ * passing both recent and historic flags in one value.
+ */
unsigned char flags;
#define TC_RED_ECN 1
#define TC_RED_HARDDROP 2
#define TC_RED_ADAPTATIVE 4
};
+#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE)
+
struct tc_red_xstats {
__u32 early; /* Early drops */
__u32 pdrop; /* Drops due to queue limits */
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 1695421333e3..61d7c5a61279 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -35,7 +35,11 @@
struct red_sched_data {
u32 limit; /* HARD maximal queue length */
- unsigned char flags;
+
+ u32 flags;
+ /* Non-flags in tc_red_qopt.flags. */
+ unsigned char userbits;
+
struct timer_list adapt_timer;
struct Qdisc *sch;
struct red_parms parms;
@@ -44,6 +48,8 @@ struct red_sched_data {
struct Qdisc *qdisc;
};
+#define RED_SUPPORTED_FLAGS TC_RED_HISTORIC_FLAGS
+
static inline int red_use_ecn(struct red_sched_data *q)
{
return q->flags & TC_RED_ECN;
@@ -186,6 +192,7 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
+ [TCA_RED_FLAGS] = { .type = NLA_U32 },
};
static int red_change(struct Qdisc *sch, struct nlattr *opt,
@@ -195,6 +202,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
struct red_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_RED_MAX + 1];
struct tc_red_qopt *ctl;
+ unsigned char userbits;
+ u32 flags;
int err;
u32 max_P;
@@ -216,6 +225,11 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
return -EINVAL;
+ if (!red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
+ tb[TCA_RED_FLAGS], RED_SUPPORTED_FLAGS,
+ &flags, &userbits, extack))
+ return -EINVAL;
+
if (ctl->limit > 0) {
child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
extack);
@@ -227,7 +241,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
}
sch_tree_lock(sch);
- q->flags = ctl->flags;
+ q->flags = flags;
+ q->userbits = userbits;
q->limit = ctl->limit;
if (child) {
qdisc_tree_flush_backlog(q->qdisc);
@@ -302,7 +317,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *opts = NULL;
struct tc_red_qopt opt = {
.limit = q->limit,
- .flags = q->flags,
+ .flags = ((q->flags & TC_RED_HISTORIC_FLAGS) |
+ q->userbits),
.qth_min = q->parms.qth_min >> q->parms.Wlog,
.qth_max = q->parms.qth_max >> q->parms.Wlog,
.Wlog = q->parms.Wlog,
@@ -321,6 +337,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
goto nla_put_failure;
+ if (q->flags & ~TC_RED_HISTORIC_FLAGS)
+ nla_put_u32(skb, TCA_RED_FLAGS, q->flags);
return nla_nest_end(skb, opts);
nla_put_failure:
--
2.20.1
Powered by blists - more mailing lists