[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180424143923.26519-3-toshiaki.makita1@gmail.com>
Date: Tue, 24 Apr 2018 23:39:16 +0900
From: Toshiaki Makita <toshiaki.makita1@...il.com>
To: netdev@...r.kernel.org
Cc: Toshiaki Makita <makita.toshiaki@....ntt.co.jp>
Subject: [PATCH RFC 2/9] veth: Add driver XDP
From: Toshiaki Makita <makita.toshiaki@....ntt.co.jp>
This is basic implementation of veth driver XDP.
Incoming packets are sent from the peer veth device in the form of skb,
so this is generally doing the same thing as generic XDP.
This itself is not so useful, but a starting point to implement other
useful veth XDP features like TX and REDIRECT.
Signed-off-by: Toshiaki Makita <makita.toshiaki@....ntt.co.jp>
---
drivers/net/veth.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 205 insertions(+), 5 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index a69ad39ee57e..9c4197306716 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -19,10 +19,15 @@
#include <net/xfrm.h>
#include <linux/veth.h>
#include <linux/module.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/bpf_trace.h>
#define DRV_NAME "veth"
#define DRV_VERSION "1.0"
+#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
+
struct pcpu_vstats {
u64 packets;
u64 bytes;
@@ -30,9 +35,11 @@ struct pcpu_vstats {
};
struct veth_priv {
+ struct bpf_prog __rcu *xdp_prog;
struct net_device __rcu *peer;
atomic64_t dropped;
unsigned requested_headroom;
+ struct xdp_rxq_info xdp_rxq;
};
/*
@@ -98,6 +105,25 @@ static const struct ethtool_ops veth_ethtool_ops = {
.get_link_ksettings = veth_get_link_ksettings,
};
+/* general routines */
+
+static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev,
+ struct sk_buff *skb);
+
+static int veth_xdp_rx(struct net_device *dev, struct sk_buff *skb)
+{
+ skb = veth_xdp_rcv_skb(dev, skb);
+ if (!skb)
+ return NET_RX_DROP;
+
+ return netif_rx(skb);
+}
+
+static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ return __dev_forward_skb(dev, skb) ?: veth_xdp_rx(dev, skb);
+}
+
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
@@ -111,7 +137,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
goto drop;
}
- if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
+ if (likely(veth_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
u64_stats_update_begin(&stats->syncp);
@@ -126,10 +152,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-/*
- * general routines
- */
-
static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
@@ -179,19 +201,152 @@ static void veth_set_multicast_list(struct net_device *dev)
{
}
+static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
+ int buflen)
+{
+ struct sk_buff *skb;
+
+ if (!buflen) {
+ buflen = SKB_DATA_ALIGN(headroom + len) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ }
+ skb = build_skb(head, buflen);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, headroom);
+ skb_put(skb, len);
+
+ return skb;
+}
+
+static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ u32 pktlen, headroom, act, metalen;
+ int size, mac_len, delta, off;
+ struct bpf_prog *xdp_prog;
+ struct xdp_buff xdp;
+ void *orig_data;
+
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(priv->xdp_prog);
+ if (!xdp_prog) {
+ rcu_read_unlock();
+ goto out;
+ }
+
+ mac_len = skb->data - skb_mac_header(skb);
+ pktlen = skb->len + mac_len;
+ size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ if (size > PAGE_SIZE)
+ goto drop;
+
+ headroom = skb_headroom(skb) - mac_len;
+ if (skb_shared(skb) || skb_head_is_locked(skb) ||
+ skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
+ struct sk_buff *nskb;
+ void *head, *start;
+ struct page *page;
+ int head_off;
+
+ page = alloc_page(GFP_ATOMIC);
+ if (!page)
+ goto drop;
+
+ head = page_address(page);
+ start = head + VETH_XDP_HEADROOM;
+ if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
+ page_frag_free(head);
+ goto drop;
+ }
+
+ nskb = veth_build_skb(head,
+ VETH_XDP_HEADROOM + mac_len, skb->len,
+ PAGE_SIZE);
+ if (!nskb) {
+ page_frag_free(head);
+ goto drop;
+ }
+
+ skb_copy_header(nskb, skb);
+ head_off = skb_headroom(nskb) - skb_headroom(skb);
+ skb_headers_offset_update(nskb, head_off);
+ dev_consume_skb_any(skb);
+ skb = nskb;
+ }
+
+ xdp.data_hard_start = skb->head;
+ xdp.data = skb_mac_header(skb);
+ xdp.data_end = xdp.data + pktlen;
+ xdp.data_meta = xdp.data;
+ xdp.rxq = &priv->xdp_rxq;
+ orig_data = xdp.data;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ switch (act) {
+ case XDP_PASS:
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ case XDP_ABORTED:
+ trace_xdp_exception(dev, xdp_prog, act);
+ case XDP_DROP:
+ goto drop;
+ }
+ rcu_read_unlock();
+
+ delta = orig_data - xdp.data;
+ off = mac_len + delta;
+ if (off > 0)
+ __skb_push(skb, off);
+ else if (off < 0)
+ __skb_pull(skb, -off);
+ skb->mac_header -= delta;
+ skb->protocol = eth_type_trans(skb, dev);
+
+ metalen = xdp.data - xdp.data_meta;
+ if (metalen)
+ skb_metadata_set(skb, metalen);
+out:
+ return skb;
+drop:
+ rcu_read_unlock();
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
static int veth_open(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
struct net_device *peer = rtnl_dereference(priv->peer);
+ int err;
if (!peer)
return -ENOTCONN;
+ err = xdp_rxq_info_reg(&priv->xdp_rxq, dev, 0);
+ if (err < 0)
+ return err;
+
+ err = xdp_rxq_info_reg_mem_model(&priv->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err < 0)
+ goto err_reg_mem;
+
if (peer->flags & IFF_UP) {
netif_carrier_on(dev);
netif_carrier_on(peer);
}
+
return 0;
+err_reg_mem:
+ xdp_rxq_info_unreg(&priv->xdp_rxq);
+
+ return err;
}
static int veth_close(struct net_device *dev)
@@ -203,6 +358,8 @@ static int veth_close(struct net_device *dev)
if (peer)
netif_carrier_off(peer);
+ xdp_rxq_info_unreg(&priv->xdp_rxq);
+
return 0;
}
@@ -276,6 +433,48 @@ static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
rcu_read_unlock();
}
+static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct bpf_prog *old_prog;
+
+ old_prog = rtnl_dereference(priv->xdp_prog);
+
+ rcu_assign_pointer(priv->xdp_prog, prog);
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+static u32 veth_xdp_query(struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ const struct bpf_prog *xdp_prog;
+
+ xdp_prog = rtnl_dereference(priv->xdp_prog);
+ if (xdp_prog)
+ return xdp_prog->aux->id;
+
+ return 0;
+}
+
+static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return veth_xdp_set(dev, xdp->prog, xdp->extack);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = veth_xdp_query(dev);
+ xdp->prog_attached = !!xdp->prog_id;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops veth_netdev_ops = {
.ndo_init = veth_dev_init,
.ndo_open = veth_open,
@@ -290,6 +489,7 @@ static const struct net_device_ops veth_netdev_ops = {
.ndo_get_iflink = veth_get_iflink,
.ndo_features_check = passthru_features_check,
.ndo_set_rx_headroom = veth_set_rx_headroom,
+ .ndo_bpf = veth_xdp,
};
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
--
2.14.3
Powered by blists - more mailing lists