lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BN3PR0701MB14121D6C7A5365FEFCA8686B892D0@BN3PR0701MB1412.namprd07.prod.outlook.com>
Date:   Sun, 19 Nov 2017 19:37:49 +0000
From:   "Chopra, Manish" <Manish.Chopra@...ium.com>
To:     Jiri Pirko <jiri@...nulli.us>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC:     "davem@...emloft.net" <davem@...emloft.net>,
        "jhs@...atatu.com" <jhs@...atatu.com>,
        "xiyou.wangcong@...il.com" <xiyou.wangcong@...il.com>,
        "saeedm@...lanox.com" <saeedm@...lanox.com>,
        "matanb@...lanox.com" <matanb@...lanox.com>,
        "leonro@...lanox.com" <leonro@...lanox.com>,
        "mlxsw@...lanox.com" <mlxsw@...lanox.com>,
        "David.Laight@...LAB.COM" <David.Laight@...LAB.COM>,
        "gerlitz.or@...il.com" <gerlitz.or@...il.com>
Subject: RE: [patch net-next v2 2/4] net: sched: introduce per-egress action
 device callbacks

> -----Original Message-----
> From: netdev-owner@...r.kernel.org [mailto:netdev-owner@...r.kernel.org]
> On Behalf Of Jiri Pirko
> Sent: Wednesday, October 11, 2017 1:11 PM
> To: netdev@...r.kernel.org
> Cc: davem@...emloft.net; jhs@...atatu.com; xiyou.wangcong@...il.com;
> saeedm@...lanox.com; matanb@...lanox.com; leonro@...lanox.com;
> mlxsw@...lanox.com; David.Laight@...LAB.COM; gerlitz.or@...il.com
> Subject: [patch net-next v2 2/4] net: sched: introduce per-egress action device
> callbacks
> 
> From: Jiri Pirko <jiri@...lanox.com>
> 
> Introduce infrastructure that allows drivers to register callbacks that are called
> whenever tc would offload inserted rule and specified device acts as tc action
> egress device.
> 
> Signed-off-by: Jiri Pirko <jiri@...lanox.com>
> ---
> v1->v2:
> - take rtnl for register/unregister
> ---
>  include/net/act_api.h |  34 ++++++++
>  include/net/pkt_cls.h |   2 +
>  net/sched/act_api.c   | 220
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  net/sched/cls_api.c   |  30 +++++++
>  4 files changed, 286 insertions(+)
> 
> diff --git a/include/net/act_api.h b/include/net/act_api.h index
> 900168a..f5e8c90 100644
> --- a/include/net/act_api.h
> +++ b/include/net/act_api.h
> @@ -174,4 +174,38 @@ static inline void tcf_action_stats_update(struct
> tc_action *a, u64 bytes,  #endif  }
> 
> +typedef int tc_setup_cb_t(enum tc_setup_type type,
> +			  void *type_data, void *cb_priv);
> +
> +#ifdef CONFIG_NET_CLS_ACT
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> +			       tc_setup_cb_t *cb, void *cb_priv); void
> +tc_setup_cb_egdev_unregister(const struct net_device *dev,
> +				  tc_setup_cb_t *cb, void *cb_priv); int
> +tc_setup_cb_egdev_call(const struct net_device *dev,
> +			   enum tc_setup_type type, void *type_data,
> +			   bool err_stop);
> +#else
> +static inline
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> +			       tc_setup_cb_t *cb, void *cb_priv) {
> +	return 0;
> +}
> +
> +static inline
> +void tc_setup_cb_egdev_unregister(const struct net_device *dev,
> +				  tc_setup_cb_t *cb, void *cb_priv) { }
> +
> +static inline
> +int tc_setup_cb_egdev_call(const struct net_device *dev,
> +			   enum tc_setup_type type, void *type_data,
> +			   bool err_stop)
> +{
> +	return 0;
> +}
> +#endif
> +
>  #endif
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index
> e80edd8..6f8149c 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -206,6 +206,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts
> *exts);  int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);  int
> tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
>  		     struct net_device **hw_dev);
> +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
> +			   void *type_data, bool err_stop);
> 
>  /**
>   * struct tcf_pkt_info - packet information diff --git a/net/sched/act_api.c
> b/net/sched/act_api.c index da6fa82..ac97db9 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -21,6 +21,8 @@
>  #include <linux/kmod.h>
>  #include <linux/err.h>
>  #include <linux/module.h>
> +#include <linux/rhashtable.h>
> +#include <linux/list.h>
>  #include <net/net_namespace.h>
>  #include <net/sock.h>
>  #include <net/sch_generic.h>
> @@ -1249,8 +1251,226 @@ static int tc_dump_action(struct sk_buff *skb,
> struct netlink_callback *cb)
>  	return skb->len;
>  }
> 
> +struct tcf_action_net {
> +	struct rhashtable egdev_ht;
> +};
> +
> +static unsigned int tcf_action_net_id;
> +
> +struct tcf_action_egdev_cb {
> +	struct list_head list;
> +	tc_setup_cb_t *cb;
> +	void *cb_priv;
> +};
> +
> +struct tcf_action_egdev {
> +	struct rhash_head ht_node;
> +	const struct net_device *dev;
> +	unsigned int refcnt;
> +	struct list_head cb_list;
> +};
> +
> +static const struct rhashtable_params tcf_action_egdev_ht_params = {
> +	.key_offset = offsetof(struct tcf_action_egdev, dev),
> +	.head_offset = offsetof(struct tcf_action_egdev, ht_node),
> +	.key_len = sizeof(const struct net_device *), };
> +
> +static struct tcf_action_egdev *
> +tcf_action_egdev_lookup(const struct net_device *dev) {
> +	struct net *net = dev_net(dev);
> +	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> +	return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
> +				      tcf_action_egdev_ht_params);
> +}
> +
> +static struct tcf_action_egdev *
> +tcf_action_egdev_get(const struct net_device *dev) {
> +	struct tcf_action_egdev *egdev;
> +	struct tcf_action_net *tan;
> +
> +	egdev = tcf_action_egdev_lookup(dev);
> +	if (egdev)
> +		goto inc_ref;
> +
> +	egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
> +	if (!egdev)
> +		return NULL;
> +	INIT_LIST_HEAD(&egdev->cb_list);
> +	tan = net_generic(dev_net(dev), tcf_action_net_id);
> +	rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
> +			       tcf_action_egdev_ht_params);
> +
> +inc_ref:
> +	egdev->refcnt++;
> +	return egdev;
> +}
> +
> +static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) {
> +	struct tcf_action_net *tan;
> +
> +	if (--egdev->refcnt)
> +		return;
> +	tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
> +	rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
> +			       tcf_action_egdev_ht_params);
> +	kfree(egdev);
> +}
> +
> +static struct tcf_action_egdev_cb *
> +tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
> +			   tc_setup_cb_t *cb, void *cb_priv) {
> +	struct tcf_action_egdev_cb *egdev_cb;
> +
> +	list_for_each_entry(egdev_cb, &egdev->cb_list, list)
> +		if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
> +			return egdev_cb;
> +	return NULL;
> +}
> +
> +static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
> +				    enum tc_setup_type type,
> +				    void *type_data, bool err_stop) {
> +	struct tcf_action_egdev_cb *egdev_cb;
> +	int ok_count = 0;
> +	int err;
> +
> +	list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
> +		err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
> +		if (err) {
> +			if (err_stop)
> +				return err;
> +		} else {
> +			ok_count++;
> +		}
> +	}
> +	return ok_count;
> +}
> +
> +static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
> +				   tc_setup_cb_t *cb, void *cb_priv) {
> +	struct tcf_action_egdev_cb *egdev_cb;
> +
> +	egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
> +	if (WARN_ON(egdev_cb))
> +		return -EEXIST;
> +	egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
> +	if (!egdev_cb)
> +		return -ENOMEM;
> +	egdev_cb->cb = cb;
> +	egdev_cb->cb_priv = cb_priv;
> +	list_add(&egdev_cb->list, &egdev->cb_list);
> +	return 0;
> +}
> +
> +static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
> +				    tc_setup_cb_t *cb, void *cb_priv) {
> +	struct tcf_action_egdev_cb *egdev_cb;
> +
> +	egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
> +	if (WARN_ON(!egdev_cb))
> +		return;
> +	list_del(&egdev_cb->list);
> +	kfree(egdev_cb);
> +}
> +
> +static int __tc_setup_cb_egdev_register(const struct net_device *dev,
> +					tc_setup_cb_t *cb, void *cb_priv)
> +{
> +	struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
> +	int err;
> +
> +	if (!egdev)
> +		return -ENOMEM;
> +	err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
> +	if (err)
> +		goto err_cb_add;
> +	return 0;
> +
> +err_cb_add:
> +	tcf_action_egdev_put(egdev);
> +	return err;
> +}
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> +			       tc_setup_cb_t *cb, void *cb_priv) {
> +	int err;
> +
> +	rtnl_lock();
> +	err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
> +	rtnl_unlock();
> +	return err;
> +}
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
> +
> +static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
> +					   tc_setup_cb_t *cb, void *cb_priv) {
> +	struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
> +
> +	if (WARN_ON(!egdev))
> +		return;
> +	tcf_action_egdev_cb_del(egdev, cb, cb_priv);
> +	tcf_action_egdev_put(egdev);
> +}
> +void tc_setup_cb_egdev_unregister(const struct net_device *dev,
> +				  tc_setup_cb_t *cb, void *cb_priv) {
> +	rtnl_lock();
> +	__tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
> +	rtnl_unlock();
> +}
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
> +
> +int tc_setup_cb_egdev_call(const struct net_device *dev,
> +			   enum tc_setup_type type, void *type_data,
> +			   bool err_stop)
> +{
> +	struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
> +
> +	if (!egdev)
> +		return 0;
> +	return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); }
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
> +
> +static __net_init int tcf_action_net_init(struct net *net) {
> +	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> +	return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
> }
> +
> +static void __net_exit tcf_action_net_exit(struct net *net) {
> +	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> +	rhashtable_destroy(&tan->egdev_ht);
> +}
> +
> +static struct pernet_operations tcf_action_net_ops = {
> +	.init = tcf_action_net_init,
> +	.exit = tcf_action_net_exit,
> +	.id = &tcf_action_net_id,
> +	.size = sizeof(struct tcf_action_net), };
> +
>  static int __init tc_action_init(void)
>  {
> +	int err;
> +
> +	err = register_pernet_subsys(&tcf_action_net_ops);
> +	if (err)
> +		return err;
> +
>  	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
>  	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
>  	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action,
> tc_dump_action, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index
> 450873b..99f9432 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -1026,6 +1026,36 @@ int tcf_exts_get_dev(struct net_device *dev, struct
> tcf_exts *exts,  }  EXPORT_SYMBOL(tcf_exts_get_dev);
> 
> +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
> +			   void *type_data, bool err_stop)
> +{
> +	int ok_count = 0;
> +#ifdef CONFIG_NET_CLS_ACT
> +	const struct tc_action *a;
> +	struct net_device *dev;
> +	LIST_HEAD(actions);
> +	int ret;
> +
> +	if (!tcf_exts_has_actions(exts))
> +		return 0;
> +
> +	tcf_exts_to_list(exts, &actions);
> +	list_for_each_entry(a, &actions, list) {
> +		if (!a->ops->get_dev)
> +			continue;
> +		dev = a->ops->get_dev(a);
> +		if (!dev || !tc_can_offload(dev))
> +			continue;
> +		ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
> +		if (ret < 0)
> +			return ret;
> +		ok_count += ret;
> +	}
> +#endif
> +	return ok_count;
> +}
> +EXPORT_SYMBOL(tcf_exts_egdev_cb_call);
> +
>  static int __init tc_filter_init(void)
>  {
>  	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
> --
> 2.9.5

Hi Jiri,  I am just trying to understand this a bit specially in context of offloaded macvlan devices.
I am assuming that any action device [fox example - offloaded macvlan device] can register for tc callback function using " tc_setup_cb_egdev_register()"
It was hard for me to call this above API from offloaded macvlan flow [.ndo_dfwd_add_station] in PF driver,
since it already grabs rtnl_lock(), so I tried to register it from different context in PF driver somehow for the offloaded macvlan device.

Now, I didn't understand actually the usage of egdev callback registration here -
Consider this example -

tc qdisc add dev <pf_device> ingress
tc filter add dev <pf_device> prio 1 protocol ip parent ffff: flower skip_sw dst_ip 192.168.55.55/24 action mirred egress redirect dev mvlan_0

The above filter add command is received by the PF registered callback [which was supplied by tcf_block_cb_register()], the same command is also
received by the callback function which was supplied in tc_setup_cb_egdev_register().

What is the significance of getting two callbacks here ? Since, I can add the above said filter in first callback function even.
What am I supposed to do in second callback ? is there any special motivation for egdev callback registration ?

Thanks,
Manish














Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ