lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1540470417-14803-2-git-send-email-john.hurley@netronome.com>
Date:   Thu, 25 Oct 2018 13:26:50 +0100
From:   John Hurley <john.hurley@...ronome.com>
To:     netdev@...r.kernel.org, oss-drivers@...ronome.com,
        jiri@...nulli.us, gerlitz.or@...il.com, ozsh@...lanox.com,
        jakub.kicinski@...ronome.com, simon.horman@...ronome.com,
        avivh@...lanox.com
Cc:     John Hurley <john.hurley@...ronome.com>
Subject: [RFC net-next v2 1/8] net: sched: register callbacks for indirect tc block binds

Currently drivers can register to receive TC block bind/unbind callbacks
by implementing the setup_tc ndo in any of their given netdevs. However,
drivers may also be interested in binds to higher level devices (e.g.
tunnel drivers) to potentially offload filters applied to them.

Introduce indirect block devs which allows drivers to register callbacks
for block binds on other devices. The calling driver is expected to
reference an 'owner' struct that it will pass to all block registrations.
This is used to track the callbacks from a given driver and free them if
the driver is removed while the upper level device is still active.
Freeing a callback will also trigger an unbind event (if necessary) to
direct the driver to remove any offloaded rules and unreg any block filter
callbacks.

Allow registering an indirect block dev callback for a device that is
already bound to a block. In this case (if it is an ingress block),
register and also trigger the callback meaning that any already installed
rules can be replayed to the calling driver.

Signed-off-by: John Hurley <john.hurley@...ronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
---
 include/net/pkt_cls.h     |  56 +++++++++
 include/net/sch_generic.h |   3 +
 net/sched/cls_api.c       | 299 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 357 insertions(+), 1 deletion(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 72ffb31..1b47837 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -37,6 +37,7 @@ struct tcf_block_ext_info {
 };
 
 struct tcf_block_cb;
+struct tcf_indr_block_owner;
 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 
 #ifdef CONFIG_NET_CLS
@@ -81,6 +82,20 @@ void __tcf_block_cb_unregister(struct tcf_block *block,
 			       struct tcf_block_cb *block_cb);
 void tcf_block_cb_unregister(struct tcf_block *block,
 			     tc_setup_cb_t *cb, void *cb_ident);
+int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				tc_indr_block_bind_cb_t *cb, void *cb_ident,
+				struct tcf_indr_block_owner *owner);
+int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+			      tc_indr_block_bind_cb_t *cb, void *cb_ident,
+			      struct tcf_indr_block_owner *owner);
+void __tc_indr_block_cb_unregister(struct net_device *dev,
+				   tc_indr_block_bind_cb_t *cb, void *cb_ident);
+void tc_indr_block_cb_unregister(struct net_device *dev,
+				 tc_indr_block_bind_cb_t *cb,
+				 void *cb_ident);
+
+struct tcf_indr_block_owner *tc_indr_block_owner_create(void);
+void tc_indr_block_owner_clean(struct tcf_indr_block_owner *owner);
 
 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		 struct tcf_result *res, bool compat_mode);
@@ -183,6 +198,47 @@ void tcf_block_cb_unregister(struct tcf_block *block,
 {
 }
 
+static inline
+int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				tc_indr_block_bind_cb_t *cb,
+				void *cb_ident,
+				struct tcf_indr_block_owner *owner)
+{
+	return 0;
+}
+
+static inline
+int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+			      tc_indr_block_bind_cb_t *cb, void *cb_ident,
+			      struct tcf_indr_block_owner *owner)
+{
+	return 0;
+}
+
+static inline
+void __tc_indr_block_cb_unregister(struct net_device *dev,
+				   tc_indr_block_bind_cb_t *cb,
+				   void *cb_ident)
+{
+}
+
+static inline
+void tc_indr_block_cb_unregister(struct net_device *dev,
+				 tc_indr_block_bind_cb_t *cb,
+				 void *cb_ident)
+{
+}
+
+static inline struct tcf_indr_block_owner *tc_indr_block_owner_create(void)
+{
+	/* NULL would mean an error, only CONFIG_NET_CLS can dereference this */
+	return (void *)1;
+}
+
+static inline void tc_indr_block_owner_clean(struct tcf_indr_block_owner *owner)
+{
+}
+
 static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			       struct tcf_result *res, bool compat_mode)
 {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4d73642..8301581 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -24,6 +24,9 @@ struct bpf_flow_keys;
 typedef int tc_setup_cb_t(enum tc_setup_type type,
 			  void *type_data, void *cb_priv);
 
+typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
+				    enum tc_setup_type type, void *type_data);
+
 struct qdisc_rate_table {
 	struct tc_ratespec rate;
 	u32		data[256];
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f427a1e..79d97bf 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -25,6 +25,7 @@
 #include <linux/kmod.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
+#include <linux/rhashtable.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/netlink.h>
@@ -365,6 +366,288 @@ static void tcf_chain_flush(struct tcf_chain *chain)
 	}
 }
 
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
+{
+	const struct Qdisc_class_ops *cops;
+	struct Qdisc *qdisc;
+
+	if (!dev_ingress_queue(dev))
+		return NULL;
+
+	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+	if (!qdisc)
+		return NULL;
+
+	cops = qdisc->ops->cl_ops;
+	if (!cops)
+		return NULL;
+
+	if (!cops->tcf_block)
+		return NULL;
+
+	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
+}
+
+static struct rhashtable indr_setup_block_ht;
+
+struct tc_indr_block_dev {
+	struct rhash_head ht_node;
+	struct net_device *dev;
+	unsigned int refcnt;
+	struct list_head cb_list;
+	struct tcf_block *block;
+};
+
+struct tc_indr_block_cb {
+	struct tc_indr_block_dev *indr_dev;
+	struct list_head list;
+	void *cb_priv;
+	tc_indr_block_bind_cb_t *cb;
+	void *cb_ident;
+	struct list_head owner_list;
+};
+
+struct tcf_indr_block_owner {
+	struct list_head cb_list;
+};
+
+static const struct rhashtable_params tc_indr_setup_block_ht_params = {
+	.key_offset	= offsetof(struct tc_indr_block_dev, dev),
+	.head_offset	= offsetof(struct tc_indr_block_dev, ht_node),
+	.key_len	= sizeof(struct net_device *),
+};
+
+static struct tc_indr_block_dev *
+tc_indr_block_dev_lookup(struct net_device *dev)
+{
+	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
+				      tc_indr_setup_block_ht_params);
+}
+
+static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
+{
+	struct tc_indr_block_dev *indr_dev;
+
+	indr_dev = tc_indr_block_dev_lookup(dev);
+	if (indr_dev)
+		goto inc_ref;
+
+	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
+	if (!indr_dev)
+		return NULL;
+
+	INIT_LIST_HEAD(&indr_dev->cb_list);
+	indr_dev->dev = dev;
+	indr_dev->block = tc_dev_ingress_block(dev);
+	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+				   tc_indr_setup_block_ht_params)) {
+		kfree(indr_dev);
+		return NULL;
+	}
+
+inc_ref:
+	indr_dev->refcnt++;
+	return indr_dev;
+}
+
+static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
+{
+	if (--indr_dev->refcnt)
+		return;
+
+	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+			       tc_indr_setup_block_ht_params);
+	kfree(indr_dev);
+}
+
+static struct tc_indr_block_cb *
+tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
+			tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct tc_indr_block_cb *indr_block_cb;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		if (indr_block_cb->cb == cb &&
+		    indr_block_cb->cb_ident == cb_ident)
+			return indr_block_cb;
+	return NULL;
+}
+
+static struct tc_indr_block_cb *
+tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev,
+		     void *cb_priv, tc_indr_block_bind_cb_t *cb,
+		     void *cb_ident, struct tcf_indr_block_owner *owner)
+{
+	struct tc_indr_block_cb *indr_block_cb;
+
+	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (indr_block_cb)
+		return ERR_PTR(-EEXIST);
+
+	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
+	if (!indr_block_cb)
+		return ERR_PTR(-ENOMEM);
+
+	indr_block_cb->indr_dev = indr_dev;
+	indr_block_cb->cb_priv = cb_priv;
+	indr_block_cb->cb = cb;
+	indr_block_cb->cb_ident = cb_ident;
+	list_add(&indr_block_cb->list, &indr_dev->cb_list);
+	list_add(&indr_block_cb->owner_list, &owner->cb_list);
+
+	return indr_block_cb;
+}
+
+static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
+{
+	list_del(&indr_block_cb->list);
+	list_del(&indr_block_cb->owner_list);
+	kfree(indr_block_cb);
+}
+
+static int tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
+				 struct tc_indr_block_cb *indr_block_cb,
+				 enum tc_block_command command)
+{
+	struct tc_block_offload bo = {
+		.command	= command,
+		.binder_type	= TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
+		.block		= indr_dev->block,
+	};
+
+	if (!indr_dev->block)
+		return 0;
+	return indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv,
+				 TC_SETUP_BLOCK, &bo);
+}
+
+int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				tc_indr_block_bind_cb_t *cb, void *cb_ident,
+				struct tcf_indr_block_owner *owner)
+{
+	struct tc_indr_block_cb *indr_block_cb;
+	struct tc_indr_block_dev *indr_dev;
+	int err;
+
+	indr_dev = tc_indr_block_dev_get(dev);
+	if (!indr_dev)
+		return -ENOMEM;
+
+	indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident,
+					     owner);
+	err = PTR_ERR_OR_ZERO(indr_block_cb);
+	if (err)
+		goto err_dev_put;
+
+	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND);
+	return 0;
+
+err_dev_put:
+	tc_indr_block_dev_put(indr_dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
+
+int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+			      tc_indr_block_bind_cb_t *cb, void *cb_ident,
+			      struct tcf_indr_block_owner *owner)
+{
+	int err;
+
+	rtnl_lock();
+	err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident, owner);
+	rtnl_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
+
+void __tc_indr_block_cb_unregister(struct net_device *dev,
+				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct tc_indr_block_cb *indr_block_cb;
+	struct tc_indr_block_dev *indr_dev;
+
+	indr_dev = tc_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (!indr_block_cb)
+		return;
+
+	/* Send unbind message if required to free any block cbs. */
+	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND);
+	tc_indr_block_cb_del(indr_block_cb);
+	tc_indr_block_dev_put(indr_dev);
+}
+EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
+
+void tc_indr_block_cb_unregister(struct net_device *dev,
+				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	rtnl_lock();
+	__tc_indr_block_cb_unregister(dev, cb, cb_ident);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
+
+struct tcf_indr_block_owner *tc_indr_block_owner_create(void)
+{
+	struct tcf_indr_block_owner *owner;
+
+	owner = kzalloc(sizeof(*owner), GFP_KERNEL);
+	if (!owner)
+		return NULL;
+	INIT_LIST_HEAD(&owner->cb_list);
+	return owner;
+}
+EXPORT_SYMBOL_GPL(tc_indr_block_owner_create);
+
+void tc_indr_block_owner_clean(struct tcf_indr_block_owner *owner)
+{
+	struct tc_indr_block_cb *indr_block_cb, *store;
+	struct tc_indr_block_dev *indr_dev;
+
+	rtnl_lock();
+	list_for_each_entry_safe(indr_block_cb, store, &owner->cb_list,
+				 owner_list) {
+		indr_dev = indr_block_cb->indr_dev;
+		tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND);
+		tc_indr_block_cb_del(indr_block_cb);
+		tc_indr_block_dev_put(indr_dev);
+	}
+	rtnl_unlock();
+
+	kfree(owner);
+}
+EXPORT_SYMBOL_GPL(tc_indr_block_owner_clean);
+
+static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
+			       struct tcf_block_ext_info *ei,
+			       enum tc_block_command command,
+			       struct netlink_ext_ack *extack)
+{
+	struct tc_indr_block_cb *indr_block_cb;
+	struct tc_indr_block_dev *indr_dev;
+	struct tc_block_offload bo = {
+		.command	= command,
+		.binder_type	= ei->binder_type,
+		.block		= block,
+		.extack		= extack,
+	};
+
+	indr_dev = tc_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	indr_dev->block = command == TC_BLOCK_BIND ? block : NULL;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+				  &bo);
+}
+
 static bool tcf_block_offload_in_use(struct tcf_block *block)
 {
 	return block->offloadcnt;
@@ -406,12 +689,17 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_inc;
-	return err;
+	if (err)
+		return err;
+
+	tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
+	return 0;
 
 no_offload_dev_inc:
 	if (tcf_block_offload_in_use(block))
 		return -EOPNOTSUPP;
 	block->nooffloaddevcnt++;
+	tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
 	return 0;
 }
 
@@ -421,6 +709,8 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 	struct net_device *dev = q->dev_queue->dev;
 	int err;
 
+	tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL);
+
 	if (!dev->netdev_ops->ndo_setup_tc)
 		goto no_offload_dev_dec;
 	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
@@ -2355,6 +2645,11 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_register_pernet_subsys;
 
+	err = rhashtable_init(&indr_setup_block_ht,
+			      &tc_indr_setup_block_ht_params);
+	if (err)
+		goto err_rhash_setup_block_ht;
+
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
@@ -2366,6 +2661,8 @@ static int __init tc_filter_init(void)
 
 	return 0;
 
+err_rhash_setup_block_ht:
+	unregister_pernet_subsys(&tcf_net_ops);
 err_register_pernet_subsys:
 	destroy_workqueue(tc_filter_wq);
 	return err;
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ