lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 08 Apr 2019 19:05:56 +0200
From:   Toke Høiland-Jørgensen <toke@...hat.com>
To:     David Miller <davem@...emloft.net>
Cc:     netdev@...r.kernel.org, Jesper Dangaard Brouer <brouer@...hat.com>,
        Daniel Borkmann <daniel@...earbox.net>,
        Alexei Starovoitov <ast@...nel.org>,
        Jakub Kicinski <jakub.kicinski@...ronome.com>,
        BjörnTöpel <bjorn.topel@...il.com>
Subject: [PATCH net-next v4 4/6] xdp: Always use a devmap for XDP_REDIRECT
 to a device

An XDP program can redirect packets between interfaces using either the
xdp_redirect() helper or the xdp_redirect_map() helper. Apart from the
flexibility of updating maps from userspace, the redirect_map() helper also
uses the map structure to batch packets, which results in a significant
(around 50%) performance boost. However, the xdp_redirect() API is simpler
if one just wants to redirect to another interface, which means people tend
to use this interface and then wonder why they getter worse performance
than expected.

This patch seeks to close this performance difference between the two APIs.
It achieves this by changing xdp_redirect() to use a hidden devmap for
looking up destination interfaces, thus gaining the batching benefit with
no visible difference from the user API point of view.

A hidden per-namespace map is allocated when an XDP program that uses the
non-map xdp_redirect() helper is first loaded. This map is populated with
all available interfaces in its namespace, and kept up to date as
interfaces come and go. Once allocated, the map is kept around until the
namespace is removed.

The hidden map uses the ifindex as map key, which means they are limited to
ifindexes smaller than the map size of 64. A later patch introduces a new
map type to lift this restriction.

Performance numbers:

Before patch:
xdp_redirect:     5426035 pkt/s
xdp_redirect_map: 8412754 pkt/s

After patch:
xdp_redirect:     8314702 pkt/s
xdp_redirect_map: 8411854 pkt/s

This corresponds to a 53% increase in xdp_redirect performance, or a
reduction in per-packet processing time by 64 nanoseconds.

Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
---
 include/linux/bpf.h         |   35 ++++++
 include/linux/filter.h      |    2 
 include/net/net_namespace.h |    2 
 include/net/netns/xdp.h     |   11 ++
 kernel/bpf/devmap.c         |  264 +++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c        |    3 
 kernel/bpf/verifier.c       |   13 ++
 net/core/dev.c              |   65 ++++++++++-
 net/core/filter.c           |   58 ---------
 9 files changed, 395 insertions(+), 58 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f62897198844..c73ff0ea1bf4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -26,6 +26,7 @@ struct sock;
 struct seq_file;
 struct btf;
 struct btf_type;
+struct net;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
@@ -541,6 +542,7 @@ extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
 extern const struct bpf_verifier_ops xdp_analyzer_ops;
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_get_by_id(u32 id);
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
 				       bool attach_drv);
 struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
@@ -621,6 +623,11 @@ struct xdp_buff;
 struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+struct bpf_map *__dev_map_get_default_map(struct net_device *dev);
+int dev_map_ensure_default_map(struct net *net);
+void dev_map_put_default_map(struct net *net);
+int dev_map_inc_redirect_use_count(void);
+void dev_map_dec_redirect_use_count(void);
 void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
@@ -650,6 +657,11 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct bpf_prog *bpf_prog_get_by_id(u32 id)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd,
 						     enum bpf_prog_type type,
 						     bool attach_drv)
@@ -702,6 +714,29 @@ static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
 	return NULL;
 }
 
+static inline struct bpf_map *__dev_map_get_default_map(struct net_device *dev)
+{
+	return NULL;
+}
+
+static inline int dev_map_ensure_default_map(struct net *net)
+{
+	return 0;
+}
+
+static inline void dev_map_put_default_map(struct net *net)
+{
+}
+
+static inline int dev_map_inc_redirect_use_count(void)
+{
+	return 0;
+}
+
+static inline void dev_map_dec_redirect_use_count(void)
+{
+}
+
 static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
 {
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6074aa064b54..df6dbf86daf6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -507,6 +507,8 @@ struct bpf_prog {
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
+				redirect_needed:1,	/* Does program need access to xdp_redirect? */
+				redirect_used:1,	/* Does program use xdp_redirect? */
 				blinded:1,	/* Was blinded */
 				is_func:1,	/* program is a bpf function */
 				kprobe_override:1, /* Do we override a kprobe? */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index a68ced28d8f4..6706ecc25d8f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -162,7 +162,7 @@ struct net {
 #if IS_ENABLED(CONFIG_CAN)
 	struct netns_can	can;
 #endif
-#ifdef CONFIG_XDP_SOCKETS
+#ifdef CONFIG_BPF_SYSCALL
 	struct netns_xdp	xdp;
 #endif
 	struct sock		*diag_nlsk;
diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h
index e5734261ba0a..4d0ac1606175 100644
--- a/include/net/netns/xdp.h
+++ b/include/net/netns/xdp.h
@@ -4,10 +4,21 @@
 
 #include <linux/rculist.h>
 #include <linux/mutex.h>
+#include <linux/atomic.h>
+
+struct bpf_dtab;
+
+struct bpf_dtab_container {
+	struct bpf_dtab __rcu *dtab;
+	atomic_t use_cnt;
+};
 
 struct netns_xdp {
+#ifdef CONFIG_XDP_SOCKETS
 	struct mutex		lock;
 	struct hlist_head	list;
+#endif
+	struct bpf_dtab_container default_map;
 };
 
 #endif /* __NETNS_XDP_H__ */
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 92393b283b87..5f0b517bde21 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -52,11 +52,16 @@
 #include <net/xdp.h>
 #include <linux/filter.h>
 #include <trace/events/xdp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 #define DEV_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
 #define DEV_MAP_BULK_SIZE 16
+#define DEV_MAP_DEFAULT_SIZE 8
+#define DEV_MAP_MAX_USE_CNT 32768
+
 struct xdp_bulk_queue {
 	struct xdp_frame *q[DEV_MAP_BULK_SIZE];
 	struct net_device *dev_rx;
@@ -81,6 +86,7 @@ struct bpf_dtab {
 
 static DEFINE_MUTEX(dev_map_mtx);
 static LIST_HEAD(dev_map_list);
+static atomic_t global_redirect_use_cnt = ATOMIC_INIT(0);
 
 static struct workqueue_struct *dev_map_wq;
 static void __dev_map_free(struct work_struct *work);
@@ -340,6 +346,19 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 	return obj;
 }
 
+/* This is only being called from xdp_do_redirect() if the xdp_redirect helper
+ * is used; the default map is allocated on XDP program load if the helper is
+ * used, so will always be available at this point.
+ */
+struct bpf_map *__dev_map_get_default_map(struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+	struct bpf_dtab *dtab;
+
+        dtab = rcu_dereference(net->xdp.default_map.dtab);
+	return &dtab->map;
+}
+
 /* Runs under RCU-read-side, plus in softirq under NAPI protection.
  * Thus, safe percpu variable access.
  */
@@ -541,14 +560,212 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_check_btf = map_check_no_btf,
 };
 
+static inline struct net *bpf_default_map_to_net(struct bpf_dtab_container *cont)
+{
+	struct netns_xdp *xdp = container_of(cont, struct netns_xdp, default_map);
+
+	return container_of(xdp, struct net, xdp);
+}
+
+static void __dev_map_release_default_map(struct bpf_dtab_container *cont)
+{
+	struct bpf_dtab *dtab = NULL;
+
+	lockdep_assert_held(&dev_map_mtx);
+
+	dtab = rcu_dereference(cont->dtab);
+	if (dtab) {
+		list_del_rcu(&dtab->list);
+		rcu_assign_pointer(cont->dtab, NULL);
+		bpf_clear_redirect_map(&dtab->map);
+		queue_work(dev_map_wq, &dtab->free_work);
+	}
+}
+
+void dev_map_put_default_map(struct net *net)
+{
+        mutex_lock(&dev_map_mtx);
+	if (atomic_dec_and_test(&net->xdp.default_map.use_cnt)) {
+		__dev_map_release_default_map(&net->xdp.default_map);
+	}
+        mutex_unlock(&dev_map_mtx);
+}
+
+static int __init_default_map(struct bpf_dtab_container *cont)
+{
+	struct net *net = bpf_default_map_to_net(cont);
+	struct bpf_dtab *dtab, *old_dtab;
+	int size = DEV_MAP_DEFAULT_SIZE;
+	struct net_device *netdev;
+	union bpf_attr attr = {};
+	u32 idx;
+	int err;
+
+	lockdep_assert_held(&dev_map_mtx);
+
+	if (!atomic_read(&global_redirect_use_cnt))
+		return 0;
+
+	for_each_netdev(net, netdev)
+		if (netdev->ifindex >= size)
+			size <<= 1;
+
+	old_dtab = rcu_dereference(cont->dtab);
+	if (old_dtab && old_dtab->map.max_entries == size)
+		return 0;
+
+	dtab = kzalloc(sizeof(*dtab), GFP_USER);
+	if (!dtab)
+		return -ENOMEM;
+
+	attr.map_type = BPF_MAP_TYPE_DEVMAP;
+	attr.max_entries = size;
+	attr.value_size = 4;
+	attr.key_size = 4;
+
+	err = dev_map_init_map(dtab, &attr, false);
+	if (err) {
+		kfree(dtab);
+		return err;
+	}
+
+	for_each_netdev(net, netdev) {
+		idx = netdev->ifindex;
+		err = __dev_map_update_elem(net, &dtab->map, &idx, &idx, 0);
+		if (err) {
+			queue_work(dev_map_wq, &dtab->free_work);
+			return err;
+		}
+	}
+
+	rcu_assign_pointer(cont->dtab, dtab);
+	list_add_tail_rcu(&dtab->list, &dev_map_list);
+
+	if (old_dtab) {
+		list_del_rcu(&old_dtab->list);
+		bpf_clear_redirect_map(&old_dtab->map);
+		queue_work(dev_map_wq, &old_dtab->free_work);
+	}
+
+	return 0;
+}
+
+static int maybe_inc_use_cnt(atomic_t *v)
+{
+	int use_cnt;
+
+	use_cnt = atomic_inc_return(v);
+	if (use_cnt > DEV_MAP_MAX_USE_CNT) {
+		atomic_dec(v);
+		return -EBUSY;
+	}
+
+	return use_cnt;
+}
+
+int dev_map_ensure_default_map(struct net *net)
+{
+	int use_cnt, err = 0;
+
+        mutex_lock(&dev_map_mtx);
+	use_cnt = maybe_inc_use_cnt(&net->xdp.default_map.use_cnt);
+	if (use_cnt < 0) {
+                err = use_cnt;
+                goto out;
+        }
+
+	if (use_cnt == 1)
+		err = __init_default_map(&net->xdp.default_map);
+
+out:
+        mutex_unlock(&dev_map_mtx);
+	return err;
+}
+
+static void __dev_map_dec_redirect_count(void)
+{
+	struct net *net;
+
+	lockdep_assert_held(&dev_map_mtx);
+
+	if (atomic_dec_and_test(&global_redirect_use_cnt))
+		for_each_net_rcu(net)
+			__dev_map_release_default_map(&net->xdp.default_map);
+}
+
+void dev_map_dec_redirect_use_count(void)
+{
+	mutex_lock(&dev_map_mtx);
+	__dev_map_dec_redirect_count();
+	mutex_unlock(&dev_map_mtx);
+}
+
+static int __dev_map_init_redirect_use(void)
+{
+	struct net *net;
+	int err;
+
+	lockdep_assert_held(&dev_map_mtx);
+
+	for_each_net_rcu(net) {
+		if (atomic_read(&net->xdp.default_map.use_cnt)) {
+			err = __init_default_map(&net->xdp.default_map);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+int dev_map_inc_redirect_use_count(void)
+{
+	int use_cnt, err = 0;
+
+	mutex_lock(&dev_map_mtx);
+	use_cnt = maybe_inc_use_cnt(&global_redirect_use_cnt);
+	if (use_cnt < 0) {
+		err = use_cnt;
+		goto out;
+	}
+
+	if (use_cnt == 1)
+		err = __dev_map_init_redirect_use();
+
+	if (err)
+		__dev_map_dec_redirect_count();
+
+ out:
+	mutex_unlock(&dev_map_mtx);
+	return err;
+}
+
 static int dev_map_notification(struct notifier_block *notifier,
 				ulong event, void *ptr)
 {
 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(netdev);
+	u32 idx = netdev->ifindex;
 	struct bpf_dtab *dtab;
-	int i;
+	int i, err;
 
 	switch (event) {
+	case NETDEV_REGISTER:
+		rcu_read_lock();
+		dtab = rcu_dereference(net->xdp.default_map.dtab);
+		if (dtab) {
+			err = __dev_map_update_elem(net, &dtab->map,
+						    &idx, &idx, 0);
+			if (err == -E2BIG) {
+				mutex_lock(&dev_map_mtx);
+				err = __init_default_map(&net->xdp.default_map);
+				if (err)
+					net_warn_ratelimited("Unable to re-allocate default map, xdp_redirect() may fail on some ifindexes\n");
+				mutex_unlock(&dev_map_mtx);
+			}
+		}
+		rcu_read_unlock();
+		break;
 	case NETDEV_UNREGISTER:
 		/* This rcu_read_lock/unlock pair is needed because
 		 * dev_map_list is an RCU list AND to ensure a delete
@@ -581,8 +798,46 @@ static struct notifier_block dev_map_notifier = {
 	.notifier_call = dev_map_notification,
 };
 
+#ifdef CONFIG_PROC_FS
+static int dev_map_default_show(struct seq_file *seq, void *v)
+{
+	struct net *net = (struct net *)seq->private;
+	struct bpf_dtab *dtab;
+
+        dtab = rcu_dereference(net->xdp.default_map.dtab);
+	seq_printf(seq, "%d %d\n",
+                   atomic_read(&net->xdp.default_map.use_cnt),
+                   dtab ? 1 : 0);
+	return 0;
+}
+#endif	/* CONFIG_PROC_FS */
+
+static int __net_init dev_map_net_init(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+	proc_create_net_single("default_dev_map", 0444, net->proc_net,
+                               dev_map_default_show, NULL);
+#endif
+	return 0;
+}
+
+static void __net_exit dev_map_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("default_dev_map", net->proc_net);
+#endif
+}
+
+
+static struct pernet_operations dev_map_net_ops = {
+	.init = dev_map_net_init,
+	.exit = dev_map_net_exit,
+};
+
 static int __init dev_map_init(void)
 {
+        int ret;
+
 	/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
 	BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
 		     offsetof(struct _bpf_dtab_netdev, dev));
@@ -591,8 +846,15 @@ static int __init dev_map_init(void)
 	if (!dev_map_wq)
 		return -ENOMEM;
 
+	ret = register_pernet_subsys(&dev_map_net_ops);
+	if (ret) {
+                destroy_workqueue(dev_map_wq);
+                return ret;
+        }
+
 	register_netdevice_notifier(&dev_map_notifier);
 	return 0;
 }
 
+
 subsys_initcall(dev_map_init);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index afca36f53c49..2b1e691b9d7e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1274,6 +1274,9 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 		kvfree(prog->aux->func_info);
 		bpf_prog_free_linfo(prog);
 
+		if (prog->redirect_used)
+			dev_map_dec_redirect_use_count();
+
 		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 	}
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2fe89138309a..7f2c01911134 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7646,6 +7646,18 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			prog->dst_needed = 1;
 		if (insn->imm == BPF_FUNC_get_prandom_u32)
 			bpf_user_rnd_init_once();
+		if (insn->imm == BPF_FUNC_redirect) {
+			prog->redirect_needed = 1;
+			if (!prog->redirect_used) {
+				int err;
+
+                                err = dev_map_inc_redirect_use_count();
+				if (err)
+					return err;
+				prog->redirect_used = 1;
+			}
+		}
+
 		if (insn->imm == BPF_FUNC_override_return)
 			prog->kprobe_override = 1;
 		if (insn->imm == BPF_FUNC_tail_call) {
@@ -7655,6 +7667,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			 * the program array.
 			 */
 			prog->cb_access = 1;
+			prog->redirect_needed = 1;
 			env->prog->aux->stack_depth = MAX_BPF_STACK;
 			env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index feafc3580350..3930777a5c6f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7992,6 +7992,23 @@ u32 __dev_xdp_query(struct net_device *dev, unsigned int target)
 	return 0;
 }
 
+static struct bpf_prog *dev_xdp_get_prog(struct net_device *dev,
+                                         unsigned int target)
+{
+        struct bpf_prog *prog;
+
+        if (WARN_ON(!(target == XDP_FLAGS_DRV_MODE ||
+                      target == XDP_FLAGS_SKB_MODE))
+            || target != dev->xdp_target)
+                return NULL;
+
+        prog = rtnl_dereference(dev->xdp_prog);
+        if (prog)
+                prog = bpf_prog_inc_not_zero(prog);
+
+        return prog;
+}
+
 static int dev_xdp_install(struct net_device *dev, unsigned int target,
 			   struct netlink_ext_ack *extack, u32 flags,
 			   struct bpf_prog *prog)
@@ -8045,7 +8062,8 @@ static int dev_xdp_install(struct net_device *dev, unsigned int target,
 
 static void dev_xdp_uninstall(struct net_device *dev)
 {
-	struct netdev_bpf xdp;
+	struct bpf_prog *prog;
+        struct netdev_bpf xdp;
 	bpf_op_t ndo_bpf;
 
 	/* Remove generic/native XDP */
@@ -8056,6 +8074,14 @@ static void dev_xdp_uninstall(struct net_device *dev)
 	if (!ndo_bpf)
 		return;
 
+        prog = dev_xdp_get_prog(dev, XDP_FLAGS_DRV_MODE);
+        if (prog) {
+                if (prog->redirect_needed)
+                        dev_map_put_default_map(dev_net(dev));
+                bpf_prog_put(prog);
+        }
+
+	/* Remove HW offload */
 	memset(&xdp, 0, sizeof(xdp));
 	xdp.command = XDP_QUERY_PROG_HW;
 	if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
@@ -8087,6 +8113,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 	const struct net_device_ops *ops = dev->netdev_ops;
 	bool offload, drv = !!ops->ndo_bpf;
 	struct bpf_prog *prog = NULL;
+        int default_map_needed = 0;
 	unsigned int target;
 	int err;
 
@@ -8107,6 +8134,16 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		return -EEXIST;
 	}
 
+        if (target == XDP_FLAGS_DRV_MODE) {
+		struct bpf_prog *old_prog = dev_xdp_get_prog(dev, target);
+
+		if (old_prog) {
+			if (old_prog->redirect_needed)
+                                default_map_needed--;
+			bpf_prog_put(old_prog);
+		}
+	}
+
 	if (fd >= 0) {
 		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
 		    __dev_xdp_query(dev, target)) {
@@ -8123,11 +8160,23 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 			bpf_prog_put(prog);
 			return -EINVAL;
 		}
+
+		if (target == XDP_FLAGS_DRV_MODE && prog->redirect_needed &&
+                    ++default_map_needed > 0) {
+			err = dev_map_ensure_default_map(dev_net(dev));
+			if (err) {
+				NL_SET_ERR_MSG(extack, "unable to allocate default map for xdp_redirect()");
+				return err;
+			}
+		}
 	}
 
 	err = dev_xdp_install(dev, target, extack, flags, prog);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
+        else if (!err && default_map_needed < 0)
+                dev_map_put_default_map(dev_net(dev));
+
 
 	return err;
 }
@@ -9365,6 +9414,7 @@ EXPORT_SYMBOL(unregister_netdev);
 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
 {
 	int err, new_nsid, new_ifindex;
+	struct bpf_prog *prog = NULL;
 
 	ASSERT_RTNL();
 
@@ -9382,6 +9432,13 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	if (net_eq(dev_net(dev), net))
 		goto out;
 
+	prog = dev_xdp_get_prog(dev, XDP_FLAGS_DRV_MODE);
+	if (prog && prog->redirect_needed) {
+                err = dev_map_ensure_default_map(net);
+		if (err)
+			goto out;
+	}
+
 	/* Pick the destination device name, and ensure
 	 * we can use it in the destination network namespace.
 	 */
@@ -9420,6 +9477,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 
+	if (prog && prog->redirect_needed)
+		dev_map_put_default_map(dev_net(dev));
+
 	new_nsid = peernet2id_alloc(dev_net(dev), net);
 	/* If there is an ifindex conflict assign a new one */
 	if (__dev_get_by_index(net, dev->ifindex))
@@ -9467,6 +9527,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	synchronize_net();
 	err = 0;
 out:
+	if (prog)
+		bpf_prog_put(prog);
+
 	return err;
 }
 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
diff --git a/net/core/filter.c b/net/core/filter.c
index cdaafa3322db..c062f18f1492 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3410,58 +3410,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
-static int __bpf_tx_xdp(struct net_device *dev,
-			struct bpf_map *map,
-			struct xdp_buff *xdp,
-			u32 index)
-{
-	struct xdp_frame *xdpf;
-	int err, sent;
-
-	if (!dev->netdev_ops->ndo_xdp_xmit) {
-		return -EOPNOTSUPP;
-	}
-
-	err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
-	if (unlikely(err))
-		return err;
-
-	xdpf = convert_to_xdp_frame(xdp);
-	if (unlikely(!xdpf))
-		return -EOVERFLOW;
-
-	sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
-	if (sent <= 0)
-		return sent;
-	return 0;
-}
-
-static noinline int
-xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
-		     struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
-{
-	struct net_device *fwd;
-	u32 index = ri->ifindex;
-	int err;
-
-	fwd = dev_get_by_index_rcu(dev_net(dev), index);
-	ri->ifindex = 0;
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
-
-	err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
-	if (unlikely(err))
-		goto err;
-
-	_trace_xdp_redirect(dev, xdp_prog, index);
-	return 0;
-err:
-	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
-	return err;
-}
-
 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 			    struct bpf_map *map,
 			    struct xdp_buff *xdp,
@@ -3592,10 +3540,10 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct bpf_map *map = READ_ONCE(ri->map);
 
-	if (likely(map))
-		return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
+	if (unlikely(!map))
+		map = __dev_map_get_default_map(dev);
 
-	return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
+	return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ