[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2d6daa85-97b9-140a-5e92-0d775ba246d0@nvidia.com>
Date: Mon, 1 Nov 2021 12:07:06 +0200
From: Oz Shlomo <ozsh@...dia.com>
To: Baowen Zheng <baowen.zheng@...igine.com>,
Simon Horman <simon.horman@...igine.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
Cc: Vlad Buslov <vladbu@...dia.com>,
Jamal Hadi Salim <jhs@...atatu.com>,
Roi Dayan <roid@...dia.com>, Ido Schimmel <idosch@...dia.com>,
Cong Wang <xiyou.wangcong@...il.com>,
Jiri Pirko <jiri@...nulli.us>,
Baowen Zheng <notifications@...hub.com>,
Louis Peens <louis.peens@...igine.com>,
oss-drivers <oss-drivers@...igine.com>
Subject: Re: [RFC/PATCH net-next v3 3/8] flow_offload: allow user to offload
tc action to net device
On 11/1/2021 4:30 AM, Baowen Zheng wrote:
> On 10/31/2021 5:50 PM, Oz Shlomo wrote:
>> On 10/28/2021 2:06 PM, Simon Horman wrote:
>>> From: Baowen Zheng <baowen.zheng@...igine.com>
>>>
>>> Use flow_indr_dev_register/flow_indr_dev_setup_offload to offload tc
>>> action.
>>
>> How will device drivers reference the offloaded actions when offloading a
>> flow?
>> Perhaps the flow_action_entry structure should also include the action index.
>>
> We have set action index in flow_offload_action to offload the action, also there are > already some actions in flow_action_entry include index which we want to offload.
> If the driver wants to support action that needs index, I think it can add the index later,
> it may not include in this patch, WDYT?
What do you mean by "action that needs index"?
Currently only the police and gate actions have an action index parameter.
However, with this series the user can create any action using the tc action API and then reference
it from any filter.
Do you see a reason not to expose the action index as a flow_action_entry attribute?
>>>
>>> We need to call tc_cleanup_flow_action to clean up tc action entry
>>> since in tc_setup_action, some actions may hold dev refcnt, especially
>>> the mirror action.
>>>
>>> Signed-off-by: Baowen Zheng <baowen.zheng@...igine.com>
>>> Signed-off-by: Louis Peens <louis.peens@...igine.com>
>>> Signed-off-by: Simon Horman <simon.horman@...igine.com>
>>> ---
>>> include/linux/netdevice.h | 1 +
>>> include/net/act_api.h | 2 +-
>>> include/net/flow_offload.h | 17 ++++
>>> include/net/pkt_cls.h | 15 ++++
>>> net/core/flow_offload.c | 43 ++++++++--
>>> net/sched/act_api.c | 166 +++++++++++++++++++++++++++++++++++++
>>> net/sched/cls_api.c | 29 ++++++-
>>> 7 files changed, 260 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>> index 3ec42495a43a..9815c3a058e9 100644
>>> --- a/include/linux/netdevice.h
>>> +++ b/include/linux/netdevice.h
>>> @@ -916,6 +916,7 @@ enum tc_setup_type {
>>> TC_SETUP_QDISC_TBF,
>>> TC_SETUP_QDISC_FIFO,
>>> TC_SETUP_QDISC_HTB,
>>> + TC_SETUP_ACT,
>>> };
>>>
>>> /* These structures hold the attributes of bpf state that are being
>>> passed diff --git a/include/net/act_api.h b/include/net/act_api.h
>>> index b5b624c7e488..9eb19188603c 100644
>>> --- a/include/net/act_api.h
>>> +++ b/include/net/act_api.h
>>> @@ -239,7 +239,7 @@ static inline void
>> tcf_action_inc_overlimit_qstats(struct tc_action *a)
>>> void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
>>> u64 drops, bool hw);
>>> int tcf_action_copy_stats(struct sk_buff *, struct tc_action *,
>>> int);
>>> -
>>> +int tcf_action_offload_del(struct tc_action *action);
>>> int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
>>> struct tcf_chain **handle,
>>> struct netlink_ext_ack *newchain); diff --git
>>> a/include/net/flow_offload.h b/include/net/flow_offload.h index
>>> 3961461d9c8b..aa28592fccc0 100644
>>> --- a/include/net/flow_offload.h
>>> +++ b/include/net/flow_offload.h
>>> @@ -552,6 +552,23 @@ struct flow_cls_offload {
>>> u32 classid;
>>> };
>>>
>>> +enum flow_act_command {
>>> + FLOW_ACT_REPLACE,
>>> + FLOW_ACT_DESTROY,
>>> + FLOW_ACT_STATS,
>>> +};
>>> +
>>> +struct flow_offload_action {
>>> + struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS
>> process*/
>>> + enum flow_act_command command;
>>> + enum flow_action_id id;
>>> + u32 index;
>>> + struct flow_stats stats;
>>> + struct flow_action action;
>>> +};
>>> +
>>> +struct flow_offload_action *flow_action_alloc(unsigned int
>>> +num_actions);
>>> +
>>> static inline struct flow_rule *
>>> flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
>>> {
>>> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index
>>> 193f88ebf629..922775407257 100644
>>> --- a/include/net/pkt_cls.h
>>> +++ b/include/net/pkt_cls.h
>>> @@ -258,6 +258,9 @@ static inline void tcf_exts_put_net(struct tcf_exts
>> *exts)
>>> for (; 0; (void)(i), (void)(a), (void)(exts))
>>> #endif
>>>
>>> +#define tcf_act_for_each_action(i, a, actions) \
>>> + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++)
>>> +
>>> static inline void
>>> tcf_exts_stats_update(const struct tcf_exts *exts,
>>> u64 bytes, u64 packets, u64 drops, u64 lastuse, @@ -532,8
>>> +535,19 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
>>> return ifindex == skb->skb_iif;
>>> }
>>>
>>> +#ifdef CONFIG_NET_CLS_ACT
>>> int tc_setup_flow_action(struct flow_action *flow_action,
>>> const struct tcf_exts *exts);
>>> +#else
>>> +static inline int tc_setup_flow_action(struct flow_action *flow_action,
>>> + const struct tcf_exts *exts) {
>>> + return 0;
>>> +}
>>> +#endif
>>> +
>>> +int tc_setup_action(struct flow_action *flow_action,
>>> + struct tc_action *actions[]);
>>> void tc_cleanup_flow_action(struct flow_action *flow_action);
>>>
>>> int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type
>>> type, @@ -554,6 +568,7 @@ int tc_setup_cb_reoffload(struct tcf_block
>> *block, struct tcf_proto *tp,
>>> enum tc_setup_type type, void *type_data,
>>> void *cb_priv, u32 *flags, unsigned int
>> *in_hw_count);
>>> unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
>>> +unsigned int tcf_act_num_actions_single(struct tc_action *act);
>>>
>>> #ifdef CONFIG_NET_CLS_ACT
>>> int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, diff
>>> --git a/net/core/flow_offload.c b/net/core/flow_offload.c index
>>> 6beaea13564a..6676431733ef 100644
>>> --- a/net/core/flow_offload.c
>>> +++ b/net/core/flow_offload.c
>>> @@ -27,6 +27,27 @@ struct flow_rule *flow_rule_alloc(unsigned int
>> num_actions)
>>> }
>>> EXPORT_SYMBOL(flow_rule_alloc);
>>>
>>> +struct flow_offload_action *flow_action_alloc(unsigned int
>>> +num_actions) {
>>> + struct flow_offload_action *fl_action;
>>> + int i;
>>> +
>>> + fl_action = kzalloc(struct_size(fl_action, action.entries, num_actions),
>>> + GFP_KERNEL);
>>> + if (!fl_action)
>>> + return NULL;
>>> +
>>> + fl_action->action.num_entries = num_actions;
>>> + /* Pre-fill each action hw_stats with DONT_CARE.
>>> + * Caller can override this if it wants stats for a given action.
>>> + */
>>> + for (i = 0; i < num_actions; i++)
>>> + fl_action->action.entries[i].hw_stats =
>>> +FLOW_ACTION_HW_STATS_DONT_CARE;
>>> +
>>> + return fl_action;
>>> +}
>>> +EXPORT_SYMBOL(flow_action_alloc);
>>> +
>>> #define FLOW_DISSECTOR_MATCH(__rule, __type, __out)
>> \
>>> const struct flow_match *__m = &(__rule)->match;
>> \
>>> struct flow_dissector *__d = (__m)->dissector;
>> \
>>> @@ -549,19 +570,25 @@ int flow_indr_dev_setup_offload(struct
>> net_device *dev, struct Qdisc *sch,
>>> void (*cleanup)(struct flow_block_cb
>> *block_cb))
>>> {
>>> struct flow_indr_dev *this;
>>> + u32 count = 0;
>>> + int err;
>>>
>>> mutex_lock(&flow_indr_block_lock);
>>> + if (bo) {
>>> + if (bo->command == FLOW_BLOCK_BIND)
>>> + indir_dev_add(data, dev, sch, type, cleanup, bo);
>>> + else if (bo->command == FLOW_BLOCK_UNBIND)
>>> + indir_dev_remove(data);
>>> + }
>>>
>>> - if (bo->command == FLOW_BLOCK_BIND)
>>> - indir_dev_add(data, dev, sch, type, cleanup, bo);
>>> - else if (bo->command == FLOW_BLOCK_UNBIND)
>>> - indir_dev_remove(data);
>>> -
>>> - list_for_each_entry(this, &flow_block_indr_dev_list, list)
>>> - this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup);
>>> + list_for_each_entry(this, &flow_block_indr_dev_list, list) {
>>> + err = this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup);
>>> + if (!err)
>>> + count++;
>>> + }
>>>
>>> mutex_unlock(&flow_indr_block_lock);
>>>
>>> - return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
>>> + return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count;
>>> }
>>> EXPORT_SYMBOL(flow_indr_dev_setup_offload);
>>> diff --git a/net/sched/act_api.c b/net/sched/act_api.c index
>>> 3258da3d5bed..33f2ff885b4b 100644
>>> --- a/net/sched/act_api.c
>>> +++ b/net/sched/act_api.c
>>> @@ -21,6 +21,19 @@
>>> #include <net/pkt_cls.h>
>>> #include <net/act_api.h>
>>> #include <net/netlink.h>
>>> +#include <net/tc_act/tc_pedit.h>
>>> +#include <net/tc_act/tc_mirred.h>
>>> +#include <net/tc_act/tc_vlan.h>
>>> +#include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_csum.h>
>>> +#include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_police.h>
>>> +#include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h>
>>> +#include <net/tc_act/tc_ct.h> #include <net/tc_act/tc_mpls.h>
>>> +#include <net/tc_act/tc_gate.h> #include <net/flow_offload.h>
>>>
>>> #ifdef CONFIG_INET
>>> DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count);
>>> @@ -148,6 +161,7 @@ static int __tcf_action_put(struct tc_action *p, bool
>> bind)
>>> idr_remove(&idrinfo->action_idr, p->tcfa_index);
>>> mutex_unlock(&idrinfo->lock);
>>>
>>> + tcf_action_offload_del(p);
>>> tcf_action_cleanup(p);
>>> return 1;
>>> }
>>> @@ -341,6 +355,7 @@ static int tcf_idr_release_unsafe(struct tc_action *p)
>>> return -EPERM;
>>>
>>> if (refcount_dec_and_test(&p->tcfa_refcnt)) {
>>> + tcf_action_offload_del(p);
>>> idr_remove(&p->idrinfo->action_idr, p->tcfa_index);
>>> tcf_action_cleanup(p);
>>> return ACT_P_DELETED;
>>> @@ -452,6 +467,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo
>> *idrinfo, u32 index)
>>> p->tcfa_index));
>>> mutex_unlock(&idrinfo->lock);
>>>
>>> + tcf_action_offload_del(p);
>>> tcf_action_cleanup(p);
>>> module_put(owner);
>>> return 0;
>>> @@ -1061,6 +1077,154 @@ struct tc_action *tcf_action_init_1(struct net
>> *net, struct tcf_proto *tp,
>>> return ERR_PTR(err);
>>> }
>>>
>>> +static int flow_action_init(struct flow_offload_action *fl_action,
>>> + struct tc_action *act,
>>> + enum flow_act_command cmd,
>>> + struct netlink_ext_ack *extack) {
>>> + if (!fl_action)
>>> + return -EINVAL;
>>> +
>>> + fl_action->extack = extack;
>>> + fl_action->command = cmd;
>>> + fl_action->index = act->tcfa_index;
>>> +
>>> + if (is_tcf_gact_ok(act)) {
>>> + fl_action->id = FLOW_ACTION_ACCEPT;
>>> + } else if (is_tcf_gact_shot(act)) {
>>> + fl_action->id = FLOW_ACTION_DROP;
>>> + } else if (is_tcf_gact_trap(act)) {
>>> + fl_action->id = FLOW_ACTION_TRAP;
>>> + } else if (is_tcf_gact_goto_chain(act)) {
>>> + fl_action->id = FLOW_ACTION_GOTO;
>>> + } else if (is_tcf_mirred_egress_redirect(act)) {
>>> + fl_action->id = FLOW_ACTION_REDIRECT;
>>> + } else if (is_tcf_mirred_egress_mirror(act)) {
>>> + fl_action->id = FLOW_ACTION_MIRRED;
>>> + } else if (is_tcf_mirred_ingress_redirect(act)) {
>>> + fl_action->id = FLOW_ACTION_REDIRECT_INGRESS;
>>> + } else if (is_tcf_mirred_ingress_mirror(act)) {
>>> + fl_action->id = FLOW_ACTION_MIRRED_INGRESS;
>>> + } else if (is_tcf_vlan(act)) {
>>> + switch (tcf_vlan_action(act)) {
>>> + case TCA_VLAN_ACT_PUSH:
>>> + fl_action->id = FLOW_ACTION_VLAN_PUSH;
>>> + break;
>>> + case TCA_VLAN_ACT_POP:
>>> + fl_action->id = FLOW_ACTION_VLAN_POP;
>>> + break;
>>> + case TCA_VLAN_ACT_MODIFY:
>>> + fl_action->id = FLOW_ACTION_VLAN_MANGLE;
>>> + break;
>>> + default:
>>> + return -EOPNOTSUPP;
>>> + }
>>> + } else if (is_tcf_tunnel_set(act)) {
>>> + fl_action->id = FLOW_ACTION_TUNNEL_ENCAP;
>>> + } else if (is_tcf_tunnel_release(act)) {
>>> + fl_action->id = FLOW_ACTION_TUNNEL_DECAP;
>>> + } else if (is_tcf_csum(act)) {
>>> + fl_action->id = FLOW_ACTION_CSUM;
>>> + } else if (is_tcf_skbedit_mark(act)) {
>>> + fl_action->id = FLOW_ACTION_MARK;
>>> + } else if (is_tcf_sample(act)) {
>>> + fl_action->id = FLOW_ACTION_SAMPLE;
>>> + } else if (is_tcf_police(act)) {
>>> + fl_action->id = FLOW_ACTION_POLICE;
>>> + } else if (is_tcf_ct(act)) {
>>> + fl_action->id = FLOW_ACTION_CT;
>>> + } else if (is_tcf_mpls(act)) {
>>> + switch (tcf_mpls_action(act)) {
>>> + case TCA_MPLS_ACT_PUSH:
>>> + fl_action->id = FLOW_ACTION_MPLS_PUSH;
>>> + break;
>>> + case TCA_MPLS_ACT_POP:
>>> + fl_action->id = FLOW_ACTION_MPLS_POP;
>>> + break;
>>> + case TCA_MPLS_ACT_MODIFY:
>>> + fl_action->id = FLOW_ACTION_MPLS_MANGLE;
>>> + break;
>>> + default:
>>> + return -EOPNOTSUPP;
>>> + }
>>> + } else if (is_tcf_skbedit_ptype(act)) {
>>> + fl_action->id = FLOW_ACTION_PTYPE;
>>> + } else if (is_tcf_skbedit_priority(act)) {
>>> + fl_action->id = FLOW_ACTION_PRIORITY;
>>> + } else if (is_tcf_gate(act)) {
>>> + fl_action->id = FLOW_ACTION_GATE;
>>> + } else {
>>> + return -EOPNOTSUPP;
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int tcf_action_offload_cmd(struct flow_offload_action *fl_act,
>>> + struct netlink_ext_ack *extack) {
>>> + int err;
>>> +
>>> + if (IS_ERR(fl_act))
>>> + return PTR_ERR(fl_act);
>>> +
>>> + err = flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT,
>>> + fl_act, NULL, NULL);
>>> + if (err < 0)
>>> + return err;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +/* offload the tc command after inserted */ static int
>>> +tcf_action_offload_add(struct tc_action *action,
>>> + struct netlink_ext_ack *extack) {
>>> + struct tc_action *actions[TCA_ACT_MAX_PRIO] = {
>>> + [0] = action,
>>> + };
>>> + struct flow_offload_action *fl_action;
>>> + int err = 0;
>>> +
>>> + fl_action = flow_action_alloc(tcf_act_num_actions_single(action));
>>> + if (!fl_action)
>>> + return -EINVAL;
>>> +
>>> + err = flow_action_init(fl_action, action, FLOW_ACT_REPLACE, extack);
>>> + if (err)
>>> + goto fl_err;
>>> +
>>> + err = tc_setup_action(&fl_action->action, actions);
>>> + if (err) {
>>> + NL_SET_ERR_MSG_MOD(extack,
>>> + "Failed to setup tc actions for offload\n");
>>> + goto fl_err;
>>> + }
>>> +
>>> + err = tcf_action_offload_cmd(fl_action, extack);
>>> + tc_cleanup_flow_action(&fl_action->action);
>>> +
>>> +fl_err:
>>> + kfree(fl_action);
>>> +
>>> + return err;
>>> +}
>>> +
>>> +int tcf_action_offload_del(struct tc_action *action) {
>>> + struct flow_offload_action fl_act;
>>> + int err = 0;
>>> +
>>> + if (!action)
>>> + return -EINVAL;
>>> +
>>> + err = flow_action_init(&fl_act, action, FLOW_ACT_DESTROY, NULL);
>>> + if (err)
>>> + return err;
>>> +
>>> + return tcf_action_offload_cmd(&fl_act, NULL); }
>>> +
>>> /* Returns numbers of initialized actions or negative error. */
>>>
>>> int tcf_action_init(struct net *net, struct tcf_proto *tp, struct
>>> nlattr *nla, @@ -1103,6 +1267,8 @@ int tcf_action_init(struct net *net,
>> struct tcf_proto *tp, struct nlattr *nla,
>>> sz += tcf_action_fill_size(act);
>>> /* Start from index 0 */
>>> actions[i - 1] = act;
>>> + if (!(flags & TCA_ACT_FLAGS_BIND))
>>> + tcf_action_offload_add(act, extack);
>>
>> Why is this restricted to actions created without the TCA_ACT_FLAGS_BIND
>> flag?
>> How are actions instantiated by the filters different from those that are
>> created by "tc actions"?
>>
> Our patch aims to offload tc action that is created independent of any flow. It is usually
> offloaded when it is added or replaced.
> This patch is to implement a process of reoffloading the actions when driver is
> inserted or removed, so it will still offload the independent actions.
I see.
>>> }
>>>
>>> /* We have to commit them all together, because if any error
>>> happened in diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
>>> index 2ef8f5a6205a..351d93988b8b 100644
>>> --- a/net/sched/cls_api.c
>>> +++ b/net/sched/cls_api.c
>>> @@ -3544,8 +3544,8 @@ static enum flow_action_hw_stats
>> tc_act_hw_stats(u8 hw_stats)
>>> return hw_stats;
>>> }
>>>
>>> -int tc_setup_flow_action(struct flow_action *flow_action,
>>> - const struct tcf_exts *exts)
>>> +int tc_setup_action(struct flow_action *flow_action,
>>> + struct tc_action *actions[])
>>> {
>>> struct tc_action *act;
>>> int i, j, k, err = 0;
>>> @@ -3554,11 +3554,11 @@ int tc_setup_flow_action(struct flow_action
>> *flow_action,
>>> BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE !=
>> FLOW_ACTION_HW_STATS_IMMEDIATE);
>>> BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED !=
>>> FLOW_ACTION_HW_STATS_DELAYED);
>>>
>>> - if (!exts)
>>> + if (!actions)
>>> return 0;
>>>
>>> j = 0;
>>> - tcf_exts_for_each_action(i, act, exts) {
>>> + tcf_act_for_each_action(i, act, actions) {
>>> struct flow_action_entry *entry;
>>>
>>> entry = &flow_action->entries[j];
>>> @@ -3725,7 +3725,19 @@ int tc_setup_flow_action(struct flow_action
>> *flow_action,
>>> spin_unlock_bh(&act->tcfa_lock);
>>> goto err_out;
>>> }
>>> +EXPORT_SYMBOL(tc_setup_action);
>>> +
>>> +#ifdef CONFIG_NET_CLS_ACT
>>> +int tc_setup_flow_action(struct flow_action *flow_action,
>>> + const struct tcf_exts *exts)
>>> +{
>>> + if (!exts)
>>> + return 0;
>>> +
>>> + return tc_setup_action(flow_action, exts->actions); }
>>> EXPORT_SYMBOL(tc_setup_flow_action);
>>> +#endif
>>>
>>> unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
>>> {
>>> @@ -3743,6 +3755,15 @@ unsigned int tcf_exts_num_actions(struct
>> tcf_exts *exts)
>>> }
>>> EXPORT_SYMBOL(tcf_exts_num_actions);
>>>
>>> +unsigned int tcf_act_num_actions_single(struct tc_action *act) {
>>> + if (is_tcf_pedit(act))
>>> + return tcf_pedit_nkeys(act);
>>> + else
>>> + return 1;
>>> +}
>>> +EXPORT_SYMBOL(tcf_act_num_actions_single);
>>> +
>>> #ifdef CONFIG_NET_CLS_ACT
>>> static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
>>> u32 *p_block_index,
>>>
Powered by blists - more mailing lists