[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170221193417.3641224-2-tom@herbertland.com>
Date: Tue, 21 Feb 2017 11:34:10 -0800
From: Tom Herbert <tom@...bertland.com>
To: <davem@...emloft.net>, <netdev@...r.kernel.org>
CC: <kernel-team@...com>
Subject: [PATCH RFC v3 1/8] xdp: Infrastructure to generalize XDP
This patch creates an infrastructure for registering and running code at
XDP hooks in drivers. This extends and generalizes the original XDP/BPF
interface. It abstract out management and running of BPF programs out of
drivers.
An XDP hook is defined by the xdp_hook structure. A pointer to this
structure is passed into the XDP register function to set up a hook.
The XDP register function mallocs its own xdp_hook structure and copies
the values from the xdp_hook passed in. The register function also saves
the pointer value of the xdp_hook argument; this pointer is used in
subsequently calls to XDP to identify the registered hook.
The interface is defined in net/xdp.h. This includes the definition of
xdp_hook, functions to register and unregister hooks on a device
or individual instances of napi, and xdp_hook_run that is called by
drivers to run the hooks.
Signed-off-by: Tom Herbert <tom@...bertland.com>
---
drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 1 +
include/linux/filter.h | 10 +-
include/linux/netdev_features.h | 3 +-
include/linux/netdevice.h | 16 ++
include/net/xdp.h | 296 ++++++++++++++++++++++
include/trace/events/xdp.h | 31 +++
kernel/bpf/core.c | 1 +
net/core/Makefile | 2 +-
net/core/dev.c | 52 ++--
net/core/filter.c | 1 +
net/core/rtnetlink.c | 14 +-
net/core/xdp.c | 306 +++++++++++++++++++++++
12 files changed, 698 insertions(+), 35 deletions(-)
create mode 100644 include/net/xdp.h
create mode 100644 net/core/xdp.c
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
index 335beb8..d294fb2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
@@ -38,6 +38,7 @@
#include <linux/filter.h>
#include <linux/pkt_cls.h>
#include <linux/unistd.h>
+#include <net/xdp.h>
#include "nfp_asm.h"
#include "nfp_bpf.h"
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0c1cc91..53b737f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -434,7 +434,7 @@ struct sk_filter {
struct bpf_prog *prog;
};
-#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi)
+#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi)
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
@@ -443,12 +443,6 @@ struct bpf_skb_data_end {
void *data_end;
};
-struct xdp_buff {
- void *data;
- void *data_end;
- void *data_hard_start;
-};
-
/* compute the linear packet data range [data, data_end) which
* will be accessed by cls_bpf, act_bpf and lwt programs
*/
@@ -510,6 +504,8 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
return BPF_PROG_RUN(prog, skb);
}
+struct xdp_buff;
+
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 9a04195..f22d379 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -71,8 +71,8 @@ enum {
NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */
NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */
-
NETIF_F_HW_TC_BIT, /* Offload TC infrastructure */
+ NETIF_F_XDP_BIT, /* Support XDP interface */
/*
* Add your fresh new feature above and remember to update
@@ -134,6 +134,7 @@ enum {
#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
#define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD)
#define NETIF_F_HW_TC __NETIF_F(HW_TC)
+#define NETIF_F_XDP __NETIF_F(XDP)
#define for_each_netdev_feature(mask_addr, bit) \
for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f40f0ab..57ac7ea 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -324,6 +324,7 @@ struct napi_struct {
struct sk_buff *skb;
struct hrtimer timer;
struct list_head dev_list;
+ struct xdp_hook_set __rcu *xdp_hooks;
struct hlist_node napi_hash_node;
unsigned int napi_id;
};
@@ -822,12 +823,25 @@ enum xdp_netdev_command {
* return true if a program is currently attached and running.
*/
XDP_QUERY_PROG,
+ /* Initialize device to use XDP. Called when first XDP program is
+ * registered on a device (including on a NAPI instance).
+ */
+ XDP_MODE_ON,
+ /* XDP is finished on the device. Called after the last XDP hook
+ * has been removed from a device.
+ */
+ XDP_MODE_OFF,
+ /* Check if device is okay with the proposed BPF program to be loaded */
+ XDP_CHECK_BPF_PROG,
+ /* Offload a BPF program to the device */
+ XDP_OFFLOAD_BPF,
};
struct netdev_xdp {
enum xdp_netdev_command command;
union {
/* XDP_SETUP_PROG */
+ /* XDP_CHECK_BPF_PROG */
struct bpf_prog *prog;
/* XDP_QUERY_PROG */
bool prog_attached;
@@ -1668,6 +1682,8 @@ struct net_device {
struct list_head close_list;
struct list_head ptype_all;
struct list_head ptype_specific;
+ struct xdp_hook_set __rcu *xdp_hooks;
+ unsigned int xdp_hook_cnt;
struct {
struct list_head upper;
diff --git a/include/net/xdp.h b/include/net/xdp.h
new file mode 100644
index 0000000..56b3cf2
--- /dev/null
+++ b/include/net/xdp.h
@@ -0,0 +1,296 @@
+/*
+ * eXpress Data Path (XDP)
+ *
+ * Copyright (c) 2017 Tom Herbert <tom@...bertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __NET_XDP_H_
+#define __NET_XDP_H_
+
+#include <linux/filter.h>
+#include <linux/netdevice.h>
+#include <linux/static_key.h>
+
+/* XDP data structure.
+ *
+ * Fields:
+ * data - pointer to first byte of data
+ * data_end - pointer to last byte
+ * data_hard_start - point to first possible byte
+ *
+ * Length is deduced by xdp->data_end - xdp->data.
+ */
+struct xdp_buff {
+ void *data;
+ void *data_end;
+ void *data_hard_start;
+};
+
+typedef unsigned int xdp_hookfn(const void *priv, struct xdp_buff *xdp);
+typedef void xdp_put_privfn(const void *priv);
+
+#define XDP_TAG_SIZE 8 /* Should be at least BPF_TAG_SIZE */
+
+/* xdp_hook struct
+ *
+ * This structure contains the ops and data for an XDP hook. A pointer
+ * to this structure providing the definition of a hook is passed into
+ * the XDP register function to set up a hook. The XDP register function
+ * mallocs its own xdp_hook structure and copies the values from the
+ * xdp_hook definition. The register function also saves the pointer value
+ * of the xdp_hook definition argument; this pointer is used in subsequent
+ * calls to XDP to find or unregister the hook.
+ *
+ * Fields:
+ *
+ * priority - priority for insertion into set. The set is ordered lowest to
+ * highest priority.
+ * priv - private data associated with hook. This is passed as an argument
+ * to the hook function. This is a bpf_prog structure.
+ * put_priv - function call when XDP is done with private data.
+ * def - point to definitions of xdp_hook. The pointer value is saved as
+ * a refernce the instance of hook loaded (used to find and unregister a
+ * hook).
+ * tag - readable tag for reporting purposes
+ */
+struct xdp_hook {
+ int priority;
+ void __rcu *priv;
+ const struct xdp_hook *def;
+ u8 tag[XDP_TAG_SIZE];
+};
+
+/* xdp_hook_set
+ *
+ * This structure holds a set of XDP hooks in an array of size num. This
+ * structure is used in netdevice to refer to the XDP hooks for a whole
+ * device or in the napi structure to contain the hooks for an individual
+ * RX queue.
+ */
+struct xdp_hook_set {
+ unsigned int num;
+ struct rcu_head rcu;
+ struct xdp_hook hooks[0];
+};
+
+#define XDP_SET_SIZE(_num) (sizeof(struct xdp_hook_set) + ((_num) * \
+ sizeof(struct xdp_hook)))
+
+extern struct xdp_hook xdp_bpf_hook;
+
+extern struct static_key_false xdp_napi_hooks_needed;
+extern struct static_key_false xdp_dev_hooks_needed;
+
+/* Check if XDP hooks are set for a napi or its device */
+static inline bool xdp_hook_run_needed_check(struct net_device *dev,
+ struct napi_struct *napi)
+{
+ return ((static_branch_unlikely(&xdp_dev_hooks_needed) &&
+ dev->xdp_hooks) ||
+ (static_branch_unlikely(&xdp_napi_hooks_needed) &&
+ napi->xdp_hooks));
+}
+
+static inline int __xdp_run_one_hook(struct xdp_hook *hook,
+ struct xdp_buff *xdp)
+{
+ void *priv = rcu_dereference(hook->priv);
+
+ return BPF_PROG_RUN((struct bpf_prog *)priv, (void *)xdp);
+}
+
+/* Core function to run the XDP hooks. This must be as fast as possible */
+static inline int __xdp_hook_run(struct xdp_hook_set *hook_set,
+ struct xdp_buff *xdp,
+ struct xdp_hook **last_hook)
+{
+ struct xdp_hook *hook;
+ int i, ret;
+
+ if (unlikely(!hook_set))
+ return XDP_PASS;
+
+ hook = &hook_set->hooks[0];
+ ret = __xdp_run_one_hook(hook, xdp);
+ *last_hook = hook;
+
+ for (i = 1; i < hook_set->num; i++) {
+ if (ret != XDP_PASS)
+ break;
+ hook = &hook_set->hooks[i];
+ ret = __xdp_run_one_hook(hook, xdp);
+ *last_hook = hook;
+ }
+
+ return ret;
+}
+
+/* Run the XDP hooks for a napi device and return a reference to the last
+ * hook processed. Called from a driver's receive routine. RCU
+ * read lock must be held.
+ */
+static inline int xdp_hook_run_ret_last(struct napi_struct *napi,
+ struct xdp_buff *xdp,
+ struct xdp_hook **last_hook)
+{
+ struct net_device *dev = napi->dev;
+ struct xdp_hook_set *hook_set;
+ int ret = XDP_PASS;
+
+ if (static_branch_unlikely(&xdp_napi_hooks_needed)) {
+ /* Run hooks in napi first */
+ hook_set = rcu_dereference(napi->xdp_hooks);
+ ret = __xdp_hook_run(hook_set, xdp, last_hook);
+
+ /* Check for dev hooks now taking into account that
+ * we need to check for XDP_PASS having been
+ * returned only if they are need (this is why
+ * we don't do a fall through).
+ */
+ if (static_branch_unlikely(&xdp_dev_hooks_needed)) {
+ if (ret != XDP_PASS)
+ return ret;
+ hook_set = rcu_dereference(dev->xdp_hooks);
+ ret = __xdp_hook_run(hook_set, xdp, last_hook);
+ }
+ } else if (static_branch_unlikely(&xdp_dev_hooks_needed)) {
+ /* Now run device hooks */
+ hook_set = rcu_dereference(dev->xdp_hooks);
+ ret = __xdp_hook_run(hook_set, xdp, last_hook);
+ }
+
+ return ret;
+}
+
+/* Run the XDP hooks for a napi device. Called from a driver's receive
+ * routine. RCU read lock must be held.
+ */
+static inline int xdp_hook_run(struct napi_struct *napi,
+ struct xdp_buff *xdp)
+{
+ struct xdp_hook *last_hook;
+
+ return xdp_hook_run_ret_last(napi, xdp, &last_hook);
+}
+
+/* Register an XDP hook
+ * dev: Assoicated net_device
+ * hook_set: Hook set
+ * def: Definition of the hook. The values are copied from this to a
+ * malloc'ed structure. The base_def pointer is saved as a
+ * reference to the hook to manage it
+ * change: Change hook if it exists
+ * dev_hook: Is a hook on a net_device (as oppsed to a napi instance)
+ */
+int __xdp_register_hook(struct net_device *dev,
+ struct xdp_hook_set __rcu **hook_set,
+ const struct xdp_hook *base_def,
+ bool change, bool dev_hook);
+
+/* Register an XDP hook on a device */
+static inline int xdp_register_dev_hook(struct net_device *dev,
+ const struct xdp_hook *def)
+{
+ return __xdp_register_hook(dev, &dev->xdp_hooks, def, false, true);
+}
+
+/* Register an XDP hook on a napi instance */
+static inline int xdp_register_napi_hook(struct napi_struct *napi,
+ const struct xdp_hook *def)
+{
+ return __xdp_register_hook(napi->dev, &napi->xdp_hooks, def, false,
+ false);
+}
+
+/* Change an XDP hook.
+ *
+ * - If the hook does not exist (xdp_hook_ops does not match a hook set on
+ * the device), then attempt to register the hook.
+ * - Else, change the private data (priv field in xdp_hook_ops) in the
+ * existing hook to be the new one (in reg). All the other fields in
+ * xdp_hook_ops are ignored in that case.
+ */
+
+/* Change a device XDP hook */
+static inline int xdp_change_dev_hook(struct net_device *dev,
+ const struct xdp_hook *reg)
+{
+ return __xdp_register_hook(dev, &dev->xdp_hooks, reg, true, true);
+}
+
+/* Change a napi XDP hook */
+static inline int xdp_change_napi_hook(struct napi_struct *napi,
+ const struct xdp_hook *reg)
+{
+ return __xdp_register_hook(napi->dev, &napi->xdp_hooks, reg, true,
+ false);
+}
+
+int __xdp_unregister_hook(struct net_device *dev,
+ struct xdp_hook_set __rcu **hook_set,
+ const struct xdp_hook *def, bool dev_hook);
+
+/* Unregister device XDP hook */
+static inline int xdp_unregister_dev_hook(struct net_device *dev,
+ const struct xdp_hook *def)
+{
+ return __xdp_unregister_hook(dev, &dev->xdp_hooks, def, true);
+}
+
+/* Unregister a napi XDP hook */
+static inline int xdp_unregister_napi_hook(struct napi_struct *napi,
+ const struct xdp_hook *def)
+{
+ return __xdp_unregister_hook(napi->dev, &napi->xdp_hooks, def, false);
+}
+
+/* Unregister all XDP hooks associated with a device (both the device hooks
+ * and hooks on all napi instances). This function is called when the netdev
+ * is being freed.
+ */
+void xdp_unregister_all_hooks(struct net_device *dev);
+
+/* Unregister all XDP hooks for a given xdp_hook_ops in a net. This walks
+ * all devices in net and napis for each device to unregister matching hooks.
+ * This can be called when a module that had registered some number of hooks
+ * is being unloaded.
+ */
+void xdp_unregister_net_hooks(struct net *net, struct xdp_hook *def);
+
+/* Find a registered device hook.
+ * - If hook is found *ret is set to the values in the registered hook and
+ * true is returned.
+ * - Else false is returned.
+ */
+bool __xdp_find_hook(struct xdp_hook_set **hook_set,
+ const struct xdp_hook *def,
+ struct xdp_hook *ret);
+
+/* Find a device XDP hook. */
+static inline bool xdp_find_dev_hook(struct net_device *dev,
+ const struct xdp_hook *def,
+ struct xdp_hook *ret)
+{
+ return __xdp_find_hook(&dev->xdp_hooks, def, ret);
+}
+
+/* Find a napi XDP hook. */
+static inline bool xdp_find_napi_hook(struct napi_struct *napi,
+ const struct xdp_hook *def,
+ struct xdp_hook *ret)
+{
+ return __xdp_find_hook(&napi->xdp_hooks, def, ret);
+}
+
+int xdp_bpf_check_prog(struct net_device *dev, struct bpf_prog *prog);
+
+static inline void xdp_warn_invalid_action(u32 act)
+{
+ WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act);
+}
+
+#endif /* __NET_XDP_H_ */
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 1b61357..9ca6306 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -7,6 +7,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/tracepoint.h>
+#include <net/xdp.h>
#define __XDP_ACT_MAP(FN) \
FN(ABORTED) \
@@ -48,6 +49,36 @@ TRACE_EVENT(xdp_exception,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB))
);
+/* Temporary trace function. This will be renamed to xdp_exception after all
+ * the calling drivers have been patched.
+ */
+TRACE_EVENT(xdp_hook_exception,
+
+ TP_PROTO(const struct net_device *dev,
+ const struct xdp_hook *hook, u32 act),
+
+ TP_ARGS(dev, hook, act),
+
+ TP_STRUCT__entry(
+ __string(name, dev->name)
+ __array(u8, prog_tag, 8)
+ __field(u32, act)
+ ),
+
+ TP_fast_assign(
+ BUILD_BUG_ON(sizeof(__entry->prog_tag) !=
+ sizeof(hook->tag));
+ memcpy(__entry->prog_tag, hook->tag, sizeof(hook->tag));
+ __assign_str(name, dev->name);
+ __entry->act = act;
+ ),
+
+ TP_printk("prog=%s device=%s action=%s",
+ __print_hex_str(__entry->prog_tag, 8),
+ __get_str(name),
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB))
+);
+
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f45827e2..04f2e30 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1412,6 +1412,7 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
#include <linux/bpf_trace.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
+EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_hook_exception);
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
diff --git a/net/core/Makefile b/net/core/Makefile
index 79f9479..52410db 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
+ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o xdp.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 05d19c6..81bdf24 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -140,6 +140,8 @@
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/crash_dump.h>
+#include <linux/filter.h>
+#include <net/xdp.h>
#include "net-sysfs.h"
@@ -6615,6 +6617,24 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
+/* Run a BPF/XDP program. RCU read lock must be held */
+static u32 dev_bpf_prog_run_xdp(const void *priv,
+ struct xdp_buff *xdp)
+{
+ const struct bpf_prog *prog = (const struct bpf_prog *)priv;
+
+ return BPF_PROG_RUN(prog, (void *)xdp);
+}
+
+static void dev_bpf_prog_put_xdp(const void *priv)
+{
+ bpf_prog_put((struct bpf_prog *)priv);
+}
+
+struct xdp_hook xdp_bpf_hook = {
+ .priority = 0,
+};
+
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
@@ -6627,7 +6647,6 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- struct netdev_xdp xdp;
int err;
ASSERT_RTNL();
@@ -6635,29 +6654,27 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
if (!ops->ndo_xdp)
return -EOPNOTSUPP;
if (fd >= 0) {
- if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
-
- err = ops->ndo_xdp(dev, &xdp);
- if (err < 0)
- return err;
- if (xdp.prog_attached)
- return -EBUSY;
- }
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
+ xdp_find_dev_hook(dev, &xdp_bpf_hook, NULL))
+ return -EBUSY;
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_SETUP_PROG;
- xdp.prog = prog;
+ if (prog) {
+ err = xdp_bpf_check_prog(dev, prog);
+ if (err >= 0) {
+ rcu_assign_pointer(xdp_bpf_hook.priv, prog);
+ err = xdp_register_dev_hook(dev, &xdp_bpf_hook);
+ }
- err = ops->ndo_xdp(dev, &xdp);
- if (err < 0 && prog)
- bpf_prog_put(prog);
+ if (err < 0)
+ bpf_prog_put(prog);
+ } else {
+ err = xdp_unregister_dev_hook(dev, &xdp_bpf_hook);
+ }
return err;
}
@@ -7698,6 +7715,7 @@ void free_netdev(struct net_device *dev)
struct napi_struct *p, *n;
might_sleep();
+ xdp_unregister_all_hooks(dev);
netif_free_tx_queues(dev);
#ifdef CONFIG_SYSFS
kvfree(dev->_rx);
diff --git a/net/core/filter.c b/net/core/filter.c
index e466e004..9a5de43 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -52,6 +52,7 @@
#include <net/dst_metadata.h>
#include <net/dst.h>
#include <net/sock_reuseport.h>
+#include <net/xdp.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c4e84c5..b2f5772 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -56,6 +56,7 @@
#include <net/fib_rules.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include <net/xdp.h>
struct rtnl_link {
rtnl_doit_func doit;
@@ -901,7 +902,7 @@ static size_t rtnl_xdp_size(const struct net_device *dev)
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
nla_total_size(1); /* XDP_ATTACHED */
- if (!dev->netdev_ops->ndo_xdp)
+ if (!(dev->features & NETIF_F_XDP))
return 0;
else
return xdp_size;
@@ -1251,20 +1252,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
{
- struct netdev_xdp xdp_op = {};
struct nlattr *xdp;
int err;
- if (!dev->netdev_ops->ndo_xdp)
- return 0;
xdp = nla_nest_start(skb, IFLA_XDP);
if (!xdp)
return -EMSGSIZE;
- xdp_op.command = XDP_QUERY_PROG;
- err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
- if (err)
- goto err_cancel;
- err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+
+ err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
+ xdp_find_dev_hook(dev, &xdp_bpf_hook, NULL));
if (err)
goto err_cancel;
diff --git a/net/core/xdp.c b/net/core/xdp.c
new file mode 100644
index 0000000..627671a
--- /dev/null
+++ b/net/core/xdp.c
@@ -0,0 +1,306 @@
+/*
+ * eXpress Data Path
+ *
+ * Copyright (c) 2017 Tom Herbert <tom@...bertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <net/xdp.h>
+
+DEFINE_STATIC_KEY_FALSE(xdp_dev_hooks_needed);
+EXPORT_SYMBOL(xdp_dev_hooks_needed);
+
+DEFINE_STATIC_KEY_FALSE(xdp_napi_hooks_needed);
+EXPORT_SYMBOL(xdp_napi_hooks_needed);
+
+static DEFINE_MUTEX(xdp_hook_mutex);
+
+int __xdp_register_hook(struct net_device *dev,
+ struct xdp_hook_set __rcu **xdp_hooks,
+ const struct xdp_hook *def,
+ bool change, bool dev_hook)
+{
+ struct xdp_hook_set *new_hooks = NULL, *old_hooks;
+ struct xdp_hook *hook;
+ int index, targindex = 0;
+ int i, err;
+
+ mutex_lock(&xdp_hook_mutex);
+
+ old_hooks = rcu_dereference(*xdp_hooks);
+
+ if (old_hooks) {
+ /* Walk over hooks, see if hook is already registered and
+ * determine insertion point.
+ */
+
+ for (index = 0; index < old_hooks->num; index++) {
+ hook = &old_hooks->hooks[index];
+ if (hook->def != def) {
+ if (def->priority < hook->priority)
+ targindex = index;
+ continue;
+ }
+
+ if (change) {
+ void *old_priv;
+
+ /* Only allow changing priv field in an existing
+ * hook.
+ */
+ old_priv = rcu_dereference_protected(hook->priv,
+ lockdep_is_held(&xdp_hook_mutex));
+ rcu_assign_pointer(hook->priv, def->priv);
+ if (old_priv)
+ bpf_prog_put((struct bpf_prog *)old_priv);
+ goto out;
+ } else {
+ /* Already registered */
+ err = -EALREADY;
+ goto err;
+ }
+ }
+ }
+
+ /* Need to add new hook set. index holds number of entries in hooks
+ * set (zero if hooks set is NULL). targindex holds index to insert
+ * new hook.
+ */
+ new_hooks = kzalloc(XDP_SET_SIZE(index + 1), GFP_KERNEL);
+ if (!new_hooks) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ /* Initialize XDP in driver */
+ if (!dev->xdp_hook_cnt && dev->netdev_ops->ndo_xdp) {
+ struct netdev_xdp xdp_op = {};
+
+ xdp_op.command = XDP_MODE_ON;
+ err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+ if (err)
+ goto err;
+ }
+
+ if (old_hooks) {
+ for (i = 0; i < targindex; i++)
+ new_hooks->hooks[i] = old_hooks->hooks[i];
+
+ for (i++; i < index + 1; i++)
+ new_hooks->hooks[i] = old_hooks->hooks[i - 1];
+ }
+
+ new_hooks->hooks[targindex] = *def;
+ rcu_assign_pointer(new_hooks->hooks[targindex].priv, def->priv);
+ new_hooks->num = index + 1;
+ rcu_assign_pointer(*xdp_hooks, new_hooks);
+
+ if (old_hooks)
+ kfree_rcu(old_hooks, rcu);
+
+ if (dev_hook)
+ static_branch_inc(&xdp_dev_hooks_needed);
+ else
+ static_branch_inc(&xdp_napi_hooks_needed);
+
+ dev->xdp_hook_cnt++;
+
+out:
+ mutex_unlock(&xdp_hook_mutex);
+
+ return 0;
+
+err:
+ mutex_unlock(&xdp_hook_mutex);
+ kfree(new_hooks);
+ return err;
+}
+EXPORT_SYMBOL_GPL(__xdp_register_hook);
+
+int __xdp_unregister_hook(struct net_device *dev,
+ struct xdp_hook_set __rcu **xdp_hooks,
+ const struct xdp_hook *def,
+ bool dev_hook)
+{
+ struct xdp_hook_set *old_hooks, *new_hooks = NULL;
+ struct xdp_hook *hook;
+ int i, index;
+ int err = 0;
+
+ old_hooks = rcu_dereference(*xdp_hooks);
+
+ mutex_lock(&xdp_hook_mutex);
+
+ for (index = 0; index < old_hooks->num; index++) {
+ hook = &old_hooks->hooks[index];
+ if (hook->def != def)
+ continue;
+
+ if (old_hooks->num > 1) {
+ new_hooks = kzalloc(XDP_SET_SIZE(
+ old_hooks->num - 1), GFP_KERNEL);
+
+ if (!new_hooks) {
+ err = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < index; i++)
+ new_hooks->hooks[i] = old_hooks->hooks[i];
+ for (i++; i < index; i++)
+ new_hooks->hooks[i - 1] = old_hooks->hooks[i];
+
+ new_hooks->num = old_hooks->num - 1;
+ }
+
+ break;
+ }
+
+ if (index >= old_hooks->num)
+ goto out;
+
+ rcu_assign_pointer(*xdp_hooks, new_hooks);
+
+ if (old_hooks)
+ kfree_rcu(old_hooks, rcu);
+
+ dev->xdp_hook_cnt--;
+
+ if (dev_hook)
+ static_branch_dec(&xdp_dev_hooks_needed);
+ else
+ static_branch_dec(&xdp_napi_hooks_needed);
+
+ if (hook->priv)
+ bpf_prog_put((struct bpf_prog *)hook->priv);
+
+ if (!dev->xdp_hook_cnt && dev->netdev_ops->ndo_xdp) {
+ struct netdev_xdp xdp_op = {};
+
+ xdp_op.command = XDP_MODE_OFF;
+ dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+ }
+
+out:
+ mutex_unlock(&xdp_hook_mutex);
+ synchronize_net();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(__xdp_unregister_hook);
+
+static void __xdp_unregister_hooks(struct net_device *dev,
+ struct xdp_hook_set __rcu **xdp_hooks,
+ bool dev_hook)
+{
+ struct xdp_hook_set *old_hooks;
+ int i;
+
+ mutex_lock(&xdp_hook_mutex);
+
+ old_hooks = rcu_dereference(*xdp_hooks);
+
+ if (!old_hooks) {
+ mutex_unlock(&xdp_hook_mutex);
+ return;
+ }
+
+ for (i = 0; i < old_hooks->num; i++) {
+ if (dev_hook)
+ static_branch_dec(&xdp_dev_hooks_needed);
+ else
+ static_branch_dec(&xdp_napi_hooks_needed);
+ dev->xdp_hook_cnt--;
+ }
+
+ rcu_assign_pointer(*xdp_hooks, NULL);
+
+ if (!dev->xdp_hook_cnt && dev->netdev_ops->ndo_xdp) {
+ struct netdev_xdp xdp_op = {};
+
+ xdp_op.command = XDP_MODE_OFF;
+ dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+ }
+
+ mutex_unlock(&xdp_hook_mutex);
+
+ kfree_rcu(old_hooks, rcu);
+}
+
+void xdp_unregister_all_hooks(struct net_device *dev)
+{
+ struct napi_struct *napi;
+
+ /* Unregister NAPI hooks for device */
+ list_for_each_entry(napi, &dev->napi_list, dev_list)
+ __xdp_unregister_hooks(dev, &napi->xdp_hooks, false);
+
+ /* Unregister device hooks */
+ __xdp_unregister_hooks(dev, &dev->xdp_hooks, true);
+}
+EXPORT_SYMBOL_GPL(xdp_unregister_all_hooks);
+
+void xdp_unregister_net_hooks(struct net *net, struct xdp_hook *def)
+{
+ struct net_device *dev;
+ struct napi_struct *napi;
+
+ list_for_each_entry_rcu(dev, &net->dev_base_head, dev_list) {
+ list_for_each_entry(napi, &dev->napi_list, dev_list)
+ xdp_unregister_napi_hook(napi, def);
+
+ xdp_unregister_dev_hook(dev, def);
+ }
+}
+EXPORT_SYMBOL_GPL(xdp_unregister_net_hooks);
+
+bool __xdp_find_hook(struct xdp_hook_set __rcu **xdp_hooks,
+ const struct xdp_hook *def,
+ struct xdp_hook *ret)
+{
+ struct xdp_hook_set *old_hooks;
+ struct xdp_hook *hook;
+ bool retval = false;
+ int index;
+
+ rcu_read_lock();
+
+ old_hooks = rcu_dereference(*xdp_hooks);
+
+ if (!old_hooks)
+ goto out;
+
+ for (index = 0; index < old_hooks->num; index++) {
+ hook = &old_hooks->hooks[index];
+ if (hook->def != def)
+ continue;
+
+ if (ret)
+ *ret = *hook;
+ retval = true;
+ goto out;
+ }
+
+out:
+ rcu_read_unlock();
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(__xdp_find_hook);
+
+int xdp_bpf_check_prog(struct net_device *dev, struct bpf_prog *prog)
+{
+ if (dev->netdev_ops->ndo_xdp) {
+ struct netdev_xdp xdp_op = {};
+
+ xdp_op.command = XDP_CHECK_BPF_PROG;
+ xdp_op.prog = prog;
+
+ return dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+ } else {
+ return -EOPNOTSUPP;
+ }
+}
+EXPORT_SYMBOL_GPL(xdp_bpf_check_prog);
--
2.9.3
Powered by blists - more mailing lists