[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BN3PR0701MB14121D6C7A5365FEFCA8686B892D0@BN3PR0701MB1412.namprd07.prod.outlook.com>
Date: Sun, 19 Nov 2017 19:37:49 +0000
From: "Chopra, Manish" <Manish.Chopra@...ium.com>
To: Jiri Pirko <jiri@...nulli.us>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC: "davem@...emloft.net" <davem@...emloft.net>,
"jhs@...atatu.com" <jhs@...atatu.com>,
"xiyou.wangcong@...il.com" <xiyou.wangcong@...il.com>,
"saeedm@...lanox.com" <saeedm@...lanox.com>,
"matanb@...lanox.com" <matanb@...lanox.com>,
"leonro@...lanox.com" <leonro@...lanox.com>,
"mlxsw@...lanox.com" <mlxsw@...lanox.com>,
"David.Laight@...LAB.COM" <David.Laight@...LAB.COM>,
"gerlitz.or@...il.com" <gerlitz.or@...il.com>
Subject: RE: [patch net-next v2 2/4] net: sched: introduce per-egress action
device callbacks
> -----Original Message-----
> From: netdev-owner@...r.kernel.org [mailto:netdev-owner@...r.kernel.org]
> On Behalf Of Jiri Pirko
> Sent: Wednesday, October 11, 2017 1:11 PM
> To: netdev@...r.kernel.org
> Cc: davem@...emloft.net; jhs@...atatu.com; xiyou.wangcong@...il.com;
> saeedm@...lanox.com; matanb@...lanox.com; leonro@...lanox.com;
> mlxsw@...lanox.com; David.Laight@...LAB.COM; gerlitz.or@...il.com
> Subject: [patch net-next v2 2/4] net: sched: introduce per-egress action device
> callbacks
>
> From: Jiri Pirko <jiri@...lanox.com>
>
> Introduce infrastructure that allows drivers to register callbacks that are called
> whenever tc would offload inserted rule and specified device acts as tc action
> egress device.
>
> Signed-off-by: Jiri Pirko <jiri@...lanox.com>
> ---
> v1->v2:
> - take rtnl for register/unregister
> ---
> include/net/act_api.h | 34 ++++++++
> include/net/pkt_cls.h | 2 +
> net/sched/act_api.c | 220
> ++++++++++++++++++++++++++++++++++++++++++++++++++
> net/sched/cls_api.c | 30 +++++++
> 4 files changed, 286 insertions(+)
>
> diff --git a/include/net/act_api.h b/include/net/act_api.h index
> 900168a..f5e8c90 100644
> --- a/include/net/act_api.h
> +++ b/include/net/act_api.h
> @@ -174,4 +174,38 @@ static inline void tcf_action_stats_update(struct
> tc_action *a, u64 bytes, #endif }
>
> +typedef int tc_setup_cb_t(enum tc_setup_type type,
> + void *type_data, void *cb_priv);
> +
> +#ifdef CONFIG_NET_CLS_ACT
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> + tc_setup_cb_t *cb, void *cb_priv); void
> +tc_setup_cb_egdev_unregister(const struct net_device *dev,
> + tc_setup_cb_t *cb, void *cb_priv); int
> +tc_setup_cb_egdev_call(const struct net_device *dev,
> + enum tc_setup_type type, void *type_data,
> + bool err_stop);
> +#else
> +static inline
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> + tc_setup_cb_t *cb, void *cb_priv) {
> + return 0;
> +}
> +
> +static inline
> +void tc_setup_cb_egdev_unregister(const struct net_device *dev,
> + tc_setup_cb_t *cb, void *cb_priv) { }
> +
> +static inline
> +int tc_setup_cb_egdev_call(const struct net_device *dev,
> + enum tc_setup_type type, void *type_data,
> + bool err_stop)
> +{
> + return 0;
> +}
> +#endif
> +
> #endif
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index
> e80edd8..6f8149c 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -206,6 +206,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts
> *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); int
> tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
> struct net_device **hw_dev);
> +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
> + void *type_data, bool err_stop);
>
> /**
> * struct tcf_pkt_info - packet information diff --git a/net/sched/act_api.c
> b/net/sched/act_api.c index da6fa82..ac97db9 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -21,6 +21,8 @@
> #include <linux/kmod.h>
> #include <linux/err.h>
> #include <linux/module.h>
> +#include <linux/rhashtable.h>
> +#include <linux/list.h>
> #include <net/net_namespace.h>
> #include <net/sock.h>
> #include <net/sch_generic.h>
> @@ -1249,8 +1251,226 @@ static int tc_dump_action(struct sk_buff *skb,
> struct netlink_callback *cb)
> return skb->len;
> }
>
> +struct tcf_action_net {
> + struct rhashtable egdev_ht;
> +};
> +
> +static unsigned int tcf_action_net_id;
> +
> +struct tcf_action_egdev_cb {
> + struct list_head list;
> + tc_setup_cb_t *cb;
> + void *cb_priv;
> +};
> +
> +struct tcf_action_egdev {
> + struct rhash_head ht_node;
> + const struct net_device *dev;
> + unsigned int refcnt;
> + struct list_head cb_list;
> +};
> +
> +static const struct rhashtable_params tcf_action_egdev_ht_params = {
> + .key_offset = offsetof(struct tcf_action_egdev, dev),
> + .head_offset = offsetof(struct tcf_action_egdev, ht_node),
> + .key_len = sizeof(const struct net_device *), };
> +
> +static struct tcf_action_egdev *
> +tcf_action_egdev_lookup(const struct net_device *dev) {
> + struct net *net = dev_net(dev);
> + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> + return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
> + tcf_action_egdev_ht_params);
> +}
> +
> +static struct tcf_action_egdev *
> +tcf_action_egdev_get(const struct net_device *dev) {
> + struct tcf_action_egdev *egdev;
> + struct tcf_action_net *tan;
> +
> + egdev = tcf_action_egdev_lookup(dev);
> + if (egdev)
> + goto inc_ref;
> +
> + egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
> + if (!egdev)
> + return NULL;
> + INIT_LIST_HEAD(&egdev->cb_list);
> + tan = net_generic(dev_net(dev), tcf_action_net_id);
> + rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
> + tcf_action_egdev_ht_params);
> +
> +inc_ref:
> + egdev->refcnt++;
> + return egdev;
> +}
> +
> +static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) {
> + struct tcf_action_net *tan;
> +
> + if (--egdev->refcnt)
> + return;
> + tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
> + rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
> + tcf_action_egdev_ht_params);
> + kfree(egdev);
> +}
> +
> +static struct tcf_action_egdev_cb *
> +tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
> + tc_setup_cb_t *cb, void *cb_priv) {
> + struct tcf_action_egdev_cb *egdev_cb;
> +
> + list_for_each_entry(egdev_cb, &egdev->cb_list, list)
> + if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
> + return egdev_cb;
> + return NULL;
> +}
> +
> +static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
> + enum tc_setup_type type,
> + void *type_data, bool err_stop) {
> + struct tcf_action_egdev_cb *egdev_cb;
> + int ok_count = 0;
> + int err;
> +
> + list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
> + err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
> + if (err) {
> + if (err_stop)
> + return err;
> + } else {
> + ok_count++;
> + }
> + }
> + return ok_count;
> +}
> +
> +static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
> + tc_setup_cb_t *cb, void *cb_priv) {
> + struct tcf_action_egdev_cb *egdev_cb;
> +
> + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
> + if (WARN_ON(egdev_cb))
> + return -EEXIST;
> + egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
> + if (!egdev_cb)
> + return -ENOMEM;
> + egdev_cb->cb = cb;
> + egdev_cb->cb_priv = cb_priv;
> + list_add(&egdev_cb->list, &egdev->cb_list);
> + return 0;
> +}
> +
> +static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
> + tc_setup_cb_t *cb, void *cb_priv) {
> + struct tcf_action_egdev_cb *egdev_cb;
> +
> + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
> + if (WARN_ON(!egdev_cb))
> + return;
> + list_del(&egdev_cb->list);
> + kfree(egdev_cb);
> +}
> +
> +static int __tc_setup_cb_egdev_register(const struct net_device *dev,
> + tc_setup_cb_t *cb, void *cb_priv)
> +{
> + struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
> + int err;
> +
> + if (!egdev)
> + return -ENOMEM;
> + err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
> + if (err)
> + goto err_cb_add;
> + return 0;
> +
> +err_cb_add:
> + tcf_action_egdev_put(egdev);
> + return err;
> +}
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> + tc_setup_cb_t *cb, void *cb_priv) {
> + int err;
> +
> + rtnl_lock();
> + err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
> + rtnl_unlock();
> + return err;
> +}
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
> +
> +static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
> + tc_setup_cb_t *cb, void *cb_priv) {
> + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
> +
> + if (WARN_ON(!egdev))
> + return;
> + tcf_action_egdev_cb_del(egdev, cb, cb_priv);
> + tcf_action_egdev_put(egdev);
> +}
> +void tc_setup_cb_egdev_unregister(const struct net_device *dev,
> + tc_setup_cb_t *cb, void *cb_priv) {
> + rtnl_lock();
> + __tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
> + rtnl_unlock();
> +}
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
> +
> +int tc_setup_cb_egdev_call(const struct net_device *dev,
> + enum tc_setup_type type, void *type_data,
> + bool err_stop)
> +{
> + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
> +
> + if (!egdev)
> + return 0;
> + return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); }
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
> +
> +static __net_init int tcf_action_net_init(struct net *net) {
> + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> + return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
> }
> +
> +static void __net_exit tcf_action_net_exit(struct net *net) {
> + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> + rhashtable_destroy(&tan->egdev_ht);
> +}
> +
> +static struct pernet_operations tcf_action_net_ops = {
> + .init = tcf_action_net_init,
> + .exit = tcf_action_net_exit,
> + .id = &tcf_action_net_id,
> + .size = sizeof(struct tcf_action_net), };
> +
> static int __init tc_action_init(void)
> {
> + int err;
> +
> + err = register_pernet_subsys(&tcf_action_net_ops);
> + if (err)
> + return err;
> +
> rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
> rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
> rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action,
> tc_dump_action, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index
> 450873b..99f9432 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -1026,6 +1026,36 @@ int tcf_exts_get_dev(struct net_device *dev, struct
> tcf_exts *exts, } EXPORT_SYMBOL(tcf_exts_get_dev);
>
> +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
> + void *type_data, bool err_stop)
> +{
> + int ok_count = 0;
> +#ifdef CONFIG_NET_CLS_ACT
> + const struct tc_action *a;
> + struct net_device *dev;
> + LIST_HEAD(actions);
> + int ret;
> +
> + if (!tcf_exts_has_actions(exts))
> + return 0;
> +
> + tcf_exts_to_list(exts, &actions);
> + list_for_each_entry(a, &actions, list) {
> + if (!a->ops->get_dev)
> + continue;
> + dev = a->ops->get_dev(a);
> + if (!dev || !tc_can_offload(dev))
> + continue;
> + ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
> + if (ret < 0)
> + return ret;
> + ok_count += ret;
> + }
> +#endif
> + return ok_count;
> +}
> +EXPORT_SYMBOL(tcf_exts_egdev_cb_call);
> +
> static int __init tc_filter_init(void)
> {
> rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
> --
> 2.9.5
Hi Jiri, I am just trying to understand this a bit specially in context of offloaded macvlan devices.
I am assuming that any action device [fox example - offloaded macvlan device] can register for tc callback function using " tc_setup_cb_egdev_register()"
It was hard for me to call this above API from offloaded macvlan flow [.ndo_dfwd_add_station] in PF driver,
since it already grabs rtnl_lock(), so I tried to register it from different context in PF driver somehow for the offloaded macvlan device.
Now, I didn't understand actually the usage of egdev callback registration here -
Consider this example -
tc qdisc add dev <pf_device> ingress
tc filter add dev <pf_device> prio 1 protocol ip parent ffff: flower skip_sw dst_ip 192.168.55.55/24 action mirred egress redirect dev mvlan_0
The above filter add command is received by the PF registered callback [which was supplied by tcf_block_cb_register()], the same command is also
received by the callback function which was supplied in tc_setup_cb_egdev_register().
What is the significance of getting two callbacks here ? Since, I can add the above said filter in first callback function even.
What am I supposed to do in second callback ? is there any special motivation for egdev callback registration ?
Thanks,
Manish
Powered by blists - more mailing lists