[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180131135356.19134-7-bjorn.topel@gmail.com>
Date: Wed, 31 Jan 2018 14:53:38 +0100
From: Björn Töpel <bjorn.topel@...il.com>
To: bjorn.topel@...il.com, magnus.karlsson@...el.com,
alexander.h.duyck@...el.com, alexander.duyck@...il.com,
john.fastabend@...il.com, ast@...com, brouer@...hat.com,
willemdebruijn.kernel@...il.com, daniel@...earbox.net,
netdev@...r.kernel.org
Cc: Björn Töpel <bjorn.topel@...el.com>,
michael.lundkvist@...csson.com, jesse.brandeburg@...el.com,
anjali.singhai@...el.com, jeffrey.b.shaw@...el.com,
ferruh.yigit@...el.com, qi.z.zhang@...el.com
Subject: [RFC PATCH 06/24] net: wire up xsk support in the XDP_REDIRECT path
From: Björn Töpel <bjorn.topel@...el.com>
In this commit we add support for XDP programs to redirect frames to a
bound AF_XDP socket.
Signed-off-by: Björn Töpel <bjorn.topel@...el.com>
---
include/linux/filter.h | 2 +-
include/net/xdp_sock.h | 28 ++++++++++++++++++++
net/core/dev.c | 28 +++++++++++---------
net/core/filter.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++----
4 files changed, 111 insertions(+), 19 deletions(-)
create mode 100644 include/net/xdp_sock.h
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 276932d75975..43cacfe2cc2a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -747,7 +747,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
* This does not appear to be a real limitation for existing software.
*/
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *prog);
+ struct xdp_buff *xdp, struct bpf_prog *prog);
int xdp_do_redirect(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *prog);
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
new file mode 100644
index 000000000000..132489fe0e70
--- /dev/null
+++ b/include/net/xdp_sock.h
@@ -0,0 +1,28 @@
+#ifndef _LINUX_AF_XDP_SOCK_H
+#define _LINUX_AF_XDP_SOCK_H
+
+struct xdp_sock;
+struct xdp_buff;
+
+#ifdef CONFIG_XDP_SOCKETS
+int xsk_generic_rcv(struct xdp_buff *xdp);
+struct xdp_sock *xsk_rcv(struct xdp_sock *xsk, struct xdp_buff *xdp);
+void xsk_flush(struct xdp_sock *xsk);
+#else
+static inline int xsk_generic_rcv(struct xdp_buff *xdp)
+{
+ return -ENOTSUPP;
+}
+
+static inline struct xdp_sock *xsk_rcv(struct xdp_sock *xsk,
+ struct xdp_buff *xdp)
+{
+ return ERR_PTR(-ENOTSUPP);
+}
+
+static inline void xsk_flush(struct xdp_sock *xsk)
+{
+}
+#endif /* CONFIG_XDP_SOCKETS */
+
+#endif /* _LINUX_AF_XDP_SOCK_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index dda9d7b9a840..94d2950fc33d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3951,11 +3951,11 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
}
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
- struct xdp_buff xdp;
void *orig_data;
int hlen, off;
u32 mac_len;
@@ -3991,18 +3991,18 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
*/
mac_len = skb->data - skb_mac_header(skb);
hlen = skb_headlen(skb) + mac_len;
- xdp.data = skb->data - mac_len;
- xdp.data_meta = xdp.data;
- xdp.data_end = xdp.data + hlen;
- xdp.data_hard_start = skb->data - skb_headroom(skb);
- orig_data = xdp.data;
+ xdp->data = skb->data - mac_len;
+ xdp->data_meta = xdp->data;
+ xdp->data_end = xdp->data + hlen;
+ xdp->data_hard_start = skb->data - skb_headroom(skb);
+ orig_data = xdp->data;
rxqueue = netif_get_rxqueue(skb);
- xdp.rxq = &rxqueue->xdp_rxq;
+ xdp->rxq = &rxqueue->xdp_rxq;
- act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
- off = xdp.data - orig_data;
+ off = xdp->data - orig_data;
if (off > 0)
__skb_pull(skb, off);
else if (off < 0)
@@ -4015,7 +4015,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
__skb_push(skb, mac_len);
break;
case XDP_PASS:
- metalen = xdp.data - xdp.data_meta;
+ metalen = xdp->data - xdp->data_meta;
if (metalen)
skb_metadata_set(skb, metalen);
break;
@@ -4065,17 +4065,19 @@ static struct static_key generic_xdp_needed __read_mostly;
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
{
if (xdp_prog) {
- u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+ struct xdp_buff xdp;
+ u32 act;
int err;
+ act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
case XDP_REDIRECT:
err = xdp_do_generic_redirect(skb->dev, skb,
- xdp_prog);
+ &xdp, xdp_prog);
if (err)
goto out_redir;
- /* fallthru to submit skb */
+ break;
case XDP_TX:
generic_xdp_tx(skb, xdp_prog);
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index aedf57489cb5..eab47173bc9e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -57,6 +57,7 @@
#include <net/busy_poll.h>
#include <net/tcp.h>
#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -1809,8 +1810,8 @@ struct redirect_info {
struct bpf_map *map;
struct bpf_map *map_to_flush;
unsigned long map_owner;
- bool to_xsk;
- /* XXX cache xsk socket here, to avoid lookup? */
+ bool xsk;
+ struct xdp_sock *xsk_to_flush;
};
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -2575,6 +2576,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
void xdp_do_flush_map(void)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct xdp_sock *xsk = ri->xsk_to_flush;
struct bpf_map *map = ri->map_to_flush;
ri->map_to_flush = NULL;
@@ -2590,6 +2592,10 @@ void xdp_do_flush_map(void)
break;
}
}
+
+ ri->xsk_to_flush = NULL;
+ if (xsk)
+ xsk_flush(xsk);
}
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
@@ -2611,6 +2617,29 @@ static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
return (unsigned long)xdp_prog->aux != aux;
}
+static int xdp_do_xsk_redirect(struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct xdp_sock *xsk;
+
+ ri->ifindex = 0;
+ ri->map = NULL;
+ ri->map_owner = 0;
+ ri->xsk = false;
+
+ xsk = xsk_rcv(ri->xsk_to_flush, xdp);
+ if (IS_ERR(xsk)) {
+ _trace_xdp_redirect_err(xdp->rxq->dev, xdp_prog, -1,
+ PTR_ERR(xsk));
+ return PTR_ERR(xsk);
+ }
+
+ ri->xsk_to_flush = xsk;
+ _trace_xdp_redirect(xdp->rxq->dev, xdp_prog, -1);
+
+ return 0;
+}
+
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
@@ -2624,6 +2653,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
ri->ifindex = 0;
ri->map = NULL;
ri->map_owner = 0;
+ ri->xsk = false;
if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
err = -EFAULT;
@@ -2659,6 +2689,9 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
u32 index = ri->ifindex;
int err;
+ if (ri->xsk)
+ return xdp_do_xsk_redirect(xdp, xdp_prog);
+
if (ri->map)
return xdp_do_redirect_map(dev, xdp, xdp_prog);
@@ -2681,6 +2714,30 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
+static int xdp_do_generic_xsk_redirect(struct sk_buff *skb,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ int err;
+
+ ri->ifindex = 0;
+ ri->map = NULL;
+ ri->map_owner = 0;
+ ri->xsk = false;
+
+ err = xsk_generic_rcv(xdp);
+ if (err) {
+ _trace_xdp_redirect_err(xdp->rxq->dev, xdp_prog, -1, err);
+ return err;
+ }
+
+ consume_skb(skb);
+ _trace_xdp_redirect(xdp->rxq->dev, xdp_prog, -1); /* XXX fix tracing to support xsk */
+
+ return 0;
+}
+
static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
{
unsigned int len;
@@ -2709,7 +2766,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
ri->ifindex = 0;
ri->map = NULL;
ri->map_owner = 0;
- ri->to_xsk = false;
+ ri->xsk = false;
if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
err = -EFAULT;
@@ -2733,6 +2790,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
}
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ generic_xdp_tx(skb, xdp_prog);
return 0;
err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
@@ -2740,13 +2798,16 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
}
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+ struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
u32 index = ri->ifindex;
struct net_device *fwd;
int err = 0;
+ if (ri->xsk)
+ return xdp_do_generic_xsk_redirect(skb, xdp, xdp_prog);
+
if (ri->map)
return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
@@ -2762,6 +2823,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
skb->dev = fwd;
_trace_xdp_redirect(dev, xdp_prog, index);
+ generic_xdp_tx(skb, xdp_prog);
return 0;
err:
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
@@ -2828,7 +2890,7 @@ BPF_CALL_0(bpf_xdpsk_redirect)
* and XDP_ABORTED on failure? Also, then we can populate xsk
* in ri, and don't have to do the lookup multiple times.
*/
- ri->to_xsk = true;
+ ri->xsk = true;
return XDP_REDIRECT;
}
--
2.14.1
Powered by blists - more mailing lists