lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 16 Jul 2018 19:37:19 -0700
From:   Jakub Kicinski <jakub.kicinski@...ronome.com>
To:     alexei.starovoitov@...il.com, daniel@...earbox.net
Cc:     oss-drivers@...ronome.com, netdev@...r.kernel.org,
        Jakub Kicinski <jakub.kicinski@...ronome.com>
Subject: [PATCH bpf-next 5/9] bpf: offload: aggregate offloads per-device

Currently we have two lists of offloaded objects - programs and maps.
Netdevice unregister notifier scans those lists to orphan objects
associated with device being unregistered.  This puts unnecessary
(even if negligible) burden on all netdev unregister calls in BPF-
-enabled kernel.  The lists of objects may potentially get long
making the linear scan even more problematic.  There haven't been
complaints about this mechanisms so far, but it is suboptimal.

Instead of relying on notifiers, make the few BPF-capable drivers
register explicitly for BPF offloads.  The programs and maps will
now be collected per-device not on a global list, and only scanned
for removal when driver unregisters from BPF offloads.

Signed-off-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@...ronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c |  13 ++
 drivers/net/netdevsim/bpf.c                   |   7 +
 include/linux/bpf.h                           |   2 +
 kernel/bpf/offload.c                          | 142 ++++++++++++------
 4 files changed, 118 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index b95b94d008cf..dee039ada75c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -404,6 +404,16 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
 	return -EINVAL;
 }
 
+static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev)
+{
+	return bpf_offload_dev_netdev_register(netdev);
+}
+
+static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev)
+{
+	bpf_offload_dev_netdev_unregister(netdev);
+}
+
 static int nfp_bpf_init(struct nfp_app *app)
 {
 	struct nfp_app_bpf *bpf;
@@ -466,6 +476,9 @@ const struct nfp_app_type app_bpf = {
 
 	.extra_cap	= nfp_bpf_extra_cap,
 
+	.ndo_init	= nfp_bpf_ndo_init,
+	.ndo_uninit	= nfp_bpf_ndo_uninit,
+
 	.vnic_alloc	= nfp_bpf_vnic_alloc,
 	.vnic_free	= nfp_bpf_vnic_free,
 
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 357f9e62f306..c4a2829e0e1f 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -582,6 +582,8 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 
 int nsim_bpf_init(struct netdevsim *ns)
 {
+	int err;
+
 	if (ns->sdev->refcnt == 1) {
 		INIT_LIST_HEAD(&ns->sdev->bpf_bound_progs);
 		INIT_LIST_HEAD(&ns->sdev->bpf_bound_maps);
@@ -592,6 +594,10 @@ int nsim_bpf_init(struct netdevsim *ns)
 			return -ENOMEM;
 	}
 
+	err = bpf_offload_dev_netdev_register(ns->netdev);
+	if (err)
+		return err;
+
 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
 			   &ns->bpf_offloaded_id);
 
@@ -625,6 +631,7 @@ void nsim_bpf_uninit(struct netdevsim *ns)
 	WARN_ON(ns->xdp.prog);
 	WARN_ON(ns->xdp_hw.prog);
 	WARN_ON(ns->bpf_offloaded);
+	bpf_offload_dev_netdev_unregister(ns->netdev);
 
 	if (ns->sdev->refcnt == 1) {
 		WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs));
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8827e797ff97..21c001c3285c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -648,6 +648,8 @@ int bpf_map_offload_delete_elem(struct bpf_map *map, void *key);
 int bpf_map_offload_get_next_key(struct bpf_map *map,
 				 void *key, void *next_key);
 
+int bpf_offload_dev_netdev_register(struct net_device *netdev);
+void bpf_offload_dev_netdev_unregister(struct net_device *netdev);
 bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index ac747d5cf7c6..b914f94c53d4 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -18,19 +18,37 @@
 #include <linux/bug.h>
 #include <linux/kdev_t.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/proc_ns.h>
+#include <linux/rhashtable.h>
 #include <linux/rtnetlink.h>
 #include <linux/rwsem.h>
 
-/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
+/* Protects offdevs, members of bpf_offload_netdev and offload members
  * of all progs.
  * RTNL lock cannot be taken when holding this lock.
  */
 static DECLARE_RWSEM(bpf_devs_lock);
-static LIST_HEAD(bpf_prog_offload_devs);
-static LIST_HEAD(bpf_map_offload_devs);
+
+struct bpf_offload_netdev {
+	struct rhash_head l;
+	struct net_device *netdev;
+	struct list_head progs;
+	struct list_head maps;
+};
+
+static const struct rhashtable_params offdevs_params = {
+	.nelem_hint		= 4,
+	.key_len		= sizeof(struct net_device *),
+	.key_offset		= offsetof(struct bpf_offload_netdev, netdev),
+	.head_offset		= offsetof(struct bpf_offload_netdev, l),
+	.automatic_shrinking	= true,
+};
+
+static struct rhashtable offdevs;
+static bool offdevs_inited;
 
 static int bpf_dev_offload_check(struct net_device *netdev)
 {
@@ -41,8 +59,19 @@ static int bpf_dev_offload_check(struct net_device *netdev)
 	return 0;
 }
 
+static struct bpf_offload_netdev *
+bpf_offload_find_netdev(struct net_device *netdev)
+{
+	lockdep_assert_held(&bpf_devs_lock);
+
+	if (!offdevs_inited)
+		return NULL;
+	return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+}
+
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 {
+	struct bpf_offload_netdev *ondev;
 	struct bpf_prog_offload *offload;
 	int err;
 
@@ -66,12 +95,13 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 		goto err_maybe_put;
 
 	down_write(&bpf_devs_lock);
-	if (offload->netdev->reg_state != NETREG_REGISTERED) {
+	ondev = bpf_offload_find_netdev(offload->netdev);
+	if (!ondev) {
 		err = -EINVAL;
 		goto err_unlock;
 	}
 	prog->aux->offload = offload;
-	list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
+	list_add_tail(&offload->offloads, &ondev->progs);
 	dev_put(offload->netdev);
 	up_write(&bpf_devs_lock);
 
@@ -294,6 +324,7 @@ static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
 struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 {
 	struct net *net = current->nsproxy->net_ns;
+	struct bpf_offload_netdev *ondev;
 	struct bpf_offloaded_map *offmap;
 	int err;
 
@@ -316,11 +347,17 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 	if (err)
 		goto err_unlock;
 
+	ondev = bpf_offload_find_netdev(offmap->netdev);
+	if (!ondev) {
+		err = -EINVAL;
+		goto err_unlock;
+	}
+
 	err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
 	if (err)
 		goto err_unlock;
 
-	list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
+	list_add_tail(&offmap->offloads, &ondev->maps);
 	up_write(&bpf_devs_lock);
 	rtnl_unlock();
 
@@ -489,56 +526,69 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
 	return ret;
 }
 
-static void bpf_offload_orphan_all_progs(struct net_device *netdev)
+int bpf_offload_dev_netdev_register(struct net_device *netdev)
 {
-	struct bpf_prog_offload *offload, *tmp;
+	struct bpf_offload_netdev *ondev;
+	int err;
 
-	list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
-		if (offload->netdev == netdev)
-			__bpf_prog_offload_destroy(offload->prog);
-}
+	down_write(&bpf_devs_lock);
+	if (!offdevs_inited) {
+		err = rhashtable_init(&offdevs, &offdevs_params);
+		if (err)
+			return err;
+		offdevs_inited = true;
+	}
+	up_write(&bpf_devs_lock);
 
-static void bpf_offload_orphan_all_maps(struct net_device *netdev)
-{
-	struct bpf_offloaded_map *offmap, *tmp;
+	ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
+	if (!ondev)
+		return -ENOMEM;
+
+	ondev->netdev = netdev;
+	INIT_LIST_HEAD(&ondev->progs);
+	INIT_LIST_HEAD(&ondev->maps);
+
+	down_write(&bpf_devs_lock);
+	err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
+	if (err) {
+		netdev_warn(netdev, "failed to register for BPF offload\n");
+		goto err_unlock_free;
+	}
 
-	list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
-		if (offmap->netdev == netdev)
-			__bpf_map_offload_destroy(offmap);
+	up_write(&bpf_devs_lock);
+	return 0;
+
+err_unlock_free:
+	up_write(&bpf_devs_lock);
+	kfree(ondev);
+	return err;
 }
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
 
-static int bpf_offload_notification(struct notifier_block *notifier,
-				    ulong event, void *ptr)
+void bpf_offload_dev_netdev_unregister(struct net_device *netdev)
 {
-	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct bpf_offloaded_map *offmap, *mtmp;
+	struct bpf_prog_offload *offload, *ptmp;
+	struct bpf_offload_netdev *ondev;
 
 	ASSERT_RTNL();
 
-	switch (event) {
-	case NETDEV_UNREGISTER:
-		/* ignore namespace changes */
-		if (netdev->reg_state != NETREG_UNREGISTERING)
-			break;
-
-		down_write(&bpf_devs_lock);
-		bpf_offload_orphan_all_progs(netdev);
-		bpf_offload_orphan_all_maps(netdev);
-		up_write(&bpf_devs_lock);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
+	down_write(&bpf_devs_lock);
+	ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+	if (WARN_ON(!ondev))
+		goto unlock;
 
-static struct notifier_block bpf_offload_notifier = {
-	.notifier_call = bpf_offload_notification,
-};
+	WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
 
-static int __init bpf_offload_init(void)
-{
-	register_netdevice_notifier(&bpf_offload_notifier);
-	return 0;
-}
+	list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
+		__bpf_prog_offload_destroy(offload->prog);
+	list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
+		__bpf_map_offload_destroy(offmap);
 
-subsys_initcall(bpf_offload_init);
+	WARN_ON(!list_empty(&ondev->progs));
+	WARN_ON(!list_empty(&ondev->maps));
+	kfree(ondev);
+unlock:
+	up_write(&bpf_devs_lock);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
-- 
2.17.1

Powered by blists - more mailing lists