[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211208143408.7047-3-xiangxia.m.yue@gmail.com>
Date: Wed, 8 Dec 2021 22:34:08 +0800
From: xiangxia.m.yue@...il.com
To: netdev@...r.kernel.org
Cc: Tonghao Zhang <xiangxia.m.yue@...il.com>,
Jamal Hadi Salim <jhs@...atatu.com>,
Cong Wang <xiyou.wangcong@...il.com>,
Jiri Pirko <jiri@...nulli.us>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Jonathan Lemon <jonathan.lemon@...il.com>,
Eric Dumazet <edumazet@...gle.com>,
Alexander Lobakin <alobakin@...me>,
Paolo Abeni <pabeni@...hat.com>,
Talal Ahmad <talalahmad@...gle.com>,
Kevin Hao <haokexin@...il.com>,
Ilias Apalodimas <ilias.apalodimas@...aro.org>,
Kees Cook <keescook@...omium.org>,
Kumar Kartikeya Dwivedi <memxor@...il.com>,
Antoine Tenart <atenart@...nel.org>,
Wei Wang <weiwan@...gle.com>, Arnd Bergmann <arnd@...db.de>
Subject: [net-next v2 2/2] net: sched: support hash/classid selecting tx queue
From: Tonghao Zhang <xiangxia.m.yue@...il.com>
This patch allows users to select queue_mapping, range
from A to B. And users can use skb-hash or cgroup classid
to select Tx queues. Then the packets can load balance from
A to B queue.
$ tc filter ... action skbedit queue_mapping hash-type normal 0 4
"skbedit queue_mapping QUEUE_MAPPING" [0] is enhanced with two
flags: SKBEDIT_F_QUEUE_MAPPING_HASH, SKBEDIT_F_QUEUE_MAPPING_CLASSID.
The range is an unsigned 16bit value in decimal format.
[0]: https://man7.org/linux/man-pages/man8/tc-skbedit.8.html
Cc: Jamal Hadi Salim <jhs@...atatu.com>
Cc: Cong Wang <xiyou.wangcong@...il.com>
Cc: Jiri Pirko <jiri@...nulli.us>
Cc: "David S. Miller" <davem@...emloft.net>
Cc: Jakub Kicinski <kuba@...nel.org>
Cc: Jonathan Lemon <jonathan.lemon@...il.com>
Cc: Eric Dumazet <edumazet@...gle.com>
Cc: Alexander Lobakin <alobakin@...me>
Cc: Paolo Abeni <pabeni@...hat.com>
Cc: Talal Ahmad <talalahmad@...gle.com>
Cc: Kevin Hao <haokexin@...il.com>
Cc: Ilias Apalodimas <ilias.apalodimas@...aro.org>
Cc: Kees Cook <keescook@...omium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@...il.com>
Cc: Antoine Tenart <atenart@...nel.org>
Cc: Wei Wang <weiwan@...gle.com>
Cc: Arnd Bergmann <arnd@...db.de>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@...il.com>
---
include/net/tc_act/tc_skbedit.h | 1 +
include/uapi/linux/tc_act/tc_skbedit.h | 6 +++
net/sched/act_skbedit.c | 58 ++++++++++++++++++++++++--
3 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 00bfee70609e..ee96e0fa6566 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -17,6 +17,7 @@ struct tcf_skbedit_params {
u32 mark;
u32 mask;
u16 queue_mapping;
+ u16 mapping_mod;
u16 ptype;
struct rcu_head rcu;
};
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index 800e93377218..8df288078dde 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -29,6 +29,11 @@
#define SKBEDIT_F_PTYPE 0x8
#define SKBEDIT_F_MASK 0x10
#define SKBEDIT_F_INHERITDSFIELD 0x20
+#define SKBEDIT_F_QUEUE_MAPPING_HASH 0x40
+#define SKBEDIT_F_QUEUE_MAPPING_CLASSID 0x80
+
+#define SKBEDIT_F_QUEUE_MAPPING_HASH_MASK (SKBEDIT_F_QUEUE_MAPPING_HASH | \
+ SKBEDIT_F_QUEUE_MAPPING_CLASSID)
struct tc_skbedit {
tc_gen;
@@ -45,6 +50,7 @@ enum {
TCA_SKBEDIT_PTYPE,
TCA_SKBEDIT_MASK,
TCA_SKBEDIT_FLAGS,
+ TCA_SKBEDIT_QUEUE_MAPPING_MAX,
__TCA_SKBEDIT_MAX
};
#define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 498feedad70a..355b43999a4a 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+#include <net/cls_cgroup.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/ip.h>
@@ -23,6 +24,25 @@
static unsigned int skbedit_net_id;
static struct tc_action_ops act_skbedit_ops;
+static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
+ struct sk_buff *skb)
+{
+ u16 queue_mapping = params->queue_mapping;
+ u16 mapping_mod = params->mapping_mod;
+ u32 hash;
+
+ if (!(params->flags & SKBEDIT_F_QUEUE_MAPPING_HASH_MASK))
+ return netdev_cap_txqueue(skb->dev, queue_mapping);
+
+ if (params->flags & SKBEDIT_F_QUEUE_MAPPING_CLASSID)
+ hash = jhash_1word(task_get_classid(skb), 0);
+ else if (params->flags & SKBEDIT_F_QUEUE_MAPPING_HASH)
+ hash = skb_get_hash(skb);
+
+ queue_mapping = queue_mapping + hash % mapping_mod;
+ return netdev_cap_txqueue(skb->dev, queue_mapping);
+}
+
static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -57,10 +77,9 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
break;
}
}
- if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
- skb->dev->real_num_tx_queues > params->queue_mapping) {
+ if (params->flags & SKBEDIT_F_QUEUE_MAPPING) {
netdev_xmit_skip_txqueue();
- skb_set_queue_mapping(skb, params->queue_mapping);
+ skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb));
}
if (params->flags & SKBEDIT_F_MARK) {
skb->mark &= ~params->mask;
@@ -94,6 +113,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
[TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
[TCA_SKBEDIT_MASK] = { .len = sizeof(u32) },
[TCA_SKBEDIT_FLAGS] = { .len = sizeof(u64) },
+ [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) },
};
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -110,6 +130,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
u16 *queue_mapping = NULL, *ptype = NULL;
+ u16 mapping_mod = 0;
bool exists = false;
int ret = 0, err;
u32 index;
@@ -157,6 +178,25 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
flags |= SKBEDIT_F_INHERITDSFIELD;
+ if (*pure_flags & SKBEDIT_F_QUEUE_MAPPING_HASH_MASK) {
+ u16 *queue_mapping_max;
+
+ if (!tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX])
+ return -EINVAL;
+
+ if (!tb[TCA_SKBEDIT_QUEUE_MAPPING])
+ return -EINVAL;
+
+ queue_mapping_max =
+ nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]);
+
+ if (*queue_mapping_max < *queue_mapping)
+ return -EINVAL;
+
+ mapping_mod = *queue_mapping_max - *queue_mapping + 1;
+ flags |= *pure_flags &
+ SKBEDIT_F_QUEUE_MAPPING_HASH_MASK;
+ }
}
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
@@ -206,8 +246,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
params_new->flags = flags;
if (flags & SKBEDIT_F_PRIORITY)
params_new->priority = *priority;
- if (flags & SKBEDIT_F_QUEUE_MAPPING)
+ if (flags & SKBEDIT_F_QUEUE_MAPPING) {
params_new->queue_mapping = *queue_mapping;
+ params_new->mapping_mod = mapping_mod;
+ }
if (flags & SKBEDIT_F_MARK)
params_new->mark = *mark;
if (flags & SKBEDIT_F_PTYPE)
@@ -274,6 +316,14 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
goto nla_put_failure;
if (params->flags & SKBEDIT_F_INHERITDSFIELD)
pure_flags |= SKBEDIT_F_INHERITDSFIELD;
+ if (params->flags & SKBEDIT_F_QUEUE_MAPPING_HASH_MASK) {
+ if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX,
+ params->queue_mapping + params->mapping_mod - 1))
+ goto nla_put_failure;
+
+ pure_flags |= params->flags &
+ SKBEDIT_F_QUEUE_MAPPING_HASH_MASK;
+ }
if (pure_flags != 0 &&
nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
goto nla_put_failure;
--
2.27.0
Powered by blists - more mailing lists