[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1538220482-16129-3-git-send-email-ilias.apalodimas@linaro.org>
Date: Sat, 29 Sep 2018 14:28:02 +0300
From: Ilias Apalodimas <ilias.apalodimas@...aro.org>
To: netdev@...r.kernel.org, jaswinder.singh@...aro.org
Cc: ard.biesheuvel@...aro.org, masami.hiramatsu@...aro.org,
arnd@...db.de, bjorn.topel@...el.com, magnus.karlsson@...el.com,
brouer@...hat.com, daniel@...earbox.net, ast@...nel.org,
jesus.sanchez-palencia@...el.com, vinicius.gomes@...el.com,
makita.toshiaki@....ntt.co.jp,
Ilias Apalodimas <ilias.apalodimas@...aro.org>
Subject: [net-next, PATCH 2/2, v3] net: socionext: add XDP support
Add basic XDP support. The interface only supports 1 Tx queue for now
so locking is introduced on the Tx queue if XDP is enabled to make sure
.ndo_start_xmit and .ndo_xdp_xmit won't corrupt Tx ring
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@...aro.org>
---
drivers/net/ethernet/socionext/netsec.c | 345 +++++++++++++++++++++++++++++---
1 file changed, 318 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 8f788a1..2b29363 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -9,6 +9,9 @@
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/netlink.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
@@ -238,6 +241,15 @@
#define NETSEC_F_NETSEC_VER_MAJOR_NUM(x) ((x) & 0xffff0000)
+#define NETSEC_XDP_PASS 0
+#define NETSEC_XDP_CONSUMED BIT(0)
+#define NETSEC_XDP_TX BIT(1)
+#define NETSEC_XDP_REDIR BIT(2)
+#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR)
+
+#define NETSEC_RXBUF_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
+ NET_IP_ALIGN)
+
enum ring_id {
NETSEC_RING_TX = 0,
NETSEC_RING_RX
@@ -256,11 +268,16 @@ struct netsec_desc_ring {
void *vaddr;
u16 pkt_cnt;
u16 head, tail;
+ u16 xdp_xmit; /* netsec_xdp_xmit packets */
+ bool is_xdp;
+ struct xdp_rxq_info xdp_rxq;
+ spinlock_t lock; /* XDP tx queue locking */
};
struct netsec_priv {
struct netsec_desc_ring desc_ring[NETSEC_RING_MAX];
struct ethtool_coalesce et_coalesce;
+ struct bpf_prog *xdp_prog;
spinlock_t reglock; /* protect reg access */
struct napi_struct napi;
phy_interface_t phy_interface;
@@ -297,6 +314,8 @@ struct netsec_rx_pkt_info {
};
static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num);
+static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
+ struct xdp_buff *xdp);
static void *netsec_alloc_rx_data(struct netsec_priv *priv,
dma_addr_t *dma_addr, u16 *len);
@@ -590,6 +609,8 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget)
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
unsigned int pkts, bytes;
+ if (dring->is_xdp)
+ spin_lock(&dring->lock);
dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT);
if (dring->pkt_cnt < budget)
@@ -615,13 +636,23 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget)
dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
DMA_TO_DEVICE);
- if (eop) {
- pkts++;
+
+ if (!eop) {
+ *desc = (struct netsec_desc){};
+ continue;
+ }
+
+ if (!desc->skb) {
+ skb_free_frag(desc->addr);
+ } else {
bytes += desc->skb->len;
dev_kfree_skb(desc->skb);
}
+ pkts++;
*desc = (struct netsec_desc){};
}
+ if (dring->is_xdp)
+ spin_unlock(&dring->lock);
dring->pkt_cnt -= budget;
priv->ndev->stats.tx_packets += budget;
@@ -656,11 +687,30 @@ static void netsec_adv_desc(u16 *idx)
*idx = 0;
}
+static void netsec_xdp_ring_tx_db(struct netsec_priv *priv, u16 pkts)
+{
+ if (likely(pkts))
+ netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, pkts);
+}
+
+static void netsec_finalize_xdp_rx(struct netsec_priv *priv, u32 xdp_res,
+ u16 pkts)
+{
+ if (xdp_res & NETSEC_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & NETSEC_XDP_TX)
+ netsec_xdp_ring_tx_db(priv, pkts);
+}
+
static int netsec_process_rx(struct netsec_priv *priv, int budget)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
+ struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
struct net_device *ndev = priv->ndev;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
+ u16 xdp_xmit = 0;
+ u32 xdp_act = 0;
int done = 0;
while (done < budget) {
@@ -668,8 +718,10 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
struct netsec_de *de = dring->vaddr + (DESC_SZ * idx);
struct netsec_desc *desc = &dring->desc[idx];
struct netsec_rx_pkt_info rpi;
- u16 pkt_len, desc_len;
+ u32 xdp_result = XDP_PASS;
dma_addr_t dma_handle;
+ u16 pkt_len, desc_len;
+ struct xdp_buff xdp;
void *buf_addr;
if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD))
@@ -706,7 +758,23 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
DMA_FROM_DEVICE);
prefetch(desc->addr);
- skb = build_skb(desc->addr, desc->len);
+ xdp.data_hard_start = desc->addr;
+ xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM;
+ xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + pkt_len;
+ xdp.rxq = &dring->xdp_rxq;
+
+ if (xdp_prog) {
+ xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp);
+ if (xdp_result != NETSEC_XDP_PASS) {
+ xdp_act |= xdp_result;
+ if (xdp_result == NETSEC_XDP_TX)
+ xdp_xmit++;
+ goto next;
+ }
+ }
+
+ skb = build_skb(xdp.data_hard_start, desc->len);
if (unlikely(!skb)) {
dma_unmap_single(priv->dev, dma_handle, desc_len,
DMA_TO_DEVICE);
@@ -715,30 +783,35 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
"rx failed to alloc skb\n");
break;
}
- dma_unmap_single_attrs(priv->dev, desc->dma_addr, desc->len,
- DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
- /* Update the descriptor with fresh buffers */
- desc->len = desc_len;
- desc->dma_addr = dma_handle;
- desc->addr = buf_addr;
-
- skb_put(skb, pkt_len);
+ skb_reserve(skb, xdp.data - xdp.data_hard_start);
+ skb_put(skb, xdp.data_end - xdp.data);
skb->protocol = eth_type_trans(skb, priv->ndev);
if (priv->rx_cksum_offload_flag &&
rpi.rx_cksum_result == NETSEC_RX_CKSUM_OK)
skb->ip_summed = CHECKSUM_UNNECESSARY;
- if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) {
+next:
+ if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) ||
+ xdp_result & NETSEC_XDP_RX_OK) {
ndev->stats.rx_packets++;
- ndev->stats.rx_bytes += pkt_len;
+ ndev->stats.rx_bytes += xdp.data_end - xdp.data;
}
+ dma_unmap_single_attrs(priv->dev, desc->dma_addr, desc->len,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+
+ /* Update the descriptor with fresh buffers */
+ desc->len = desc_len;
+ desc->dma_addr = dma_handle;
+ desc->addr = buf_addr;
netsec_rx_fill(priv, idx, 1);
netsec_adv_desc(&dring->tail);
}
+ netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit);
+
return done;
}
@@ -805,7 +878,9 @@ static void netsec_set_tx_de(struct netsec_priv *priv,
de->data_buf_addr_lw = lower_32_bits(desc->dma_addr);
de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len;
de->attr = attr;
- dma_wmb();
+ /* under spin_lock if using XDP */
+ if (!dring->is_xdp)
+ dma_wmb();
dring->desc[idx] = *desc;
dring->desc[idx].skb = skb;
@@ -824,6 +899,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
u16 tso_seg_len = 0;
int filled;
+ if (dring->is_xdp)
+ spin_lock_bh(&dring->lock);
/* differentiate between full/emtpy ring */
if (dring->head >= dring->tail)
filled = dring->head - dring->tail;
@@ -831,6 +908,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
filled = dring->head + DESC_NUM - dring->tail;
if (DESC_NUM - filled < 2) { /* if less than 2 available */
+ if (dring->is_xdp)
+ spin_unlock_bh(&dring->lock);
netif_err(priv, drv, priv->ndev, "%s: TxQFull!\n", __func__);
netif_stop_queue(priv->ndev);
dma_wmb();
@@ -864,6 +943,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
tx_desc.dma_addr = dma_map_single(priv->dev, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) {
+ if (dring->is_xdp)
+ spin_unlock_bh(&dring->lock);
netif_err(priv, drv, priv->ndev,
"%s: DMA mapping failed\n", __func__);
ndev->stats.tx_dropped++;
@@ -877,6 +958,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
netdev_sent_queue(priv->ndev, skb->len);
netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb);
+ if (dring->is_xdp)
+ spin_unlock_bh(&dring->lock);
netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */
return NETDEV_TX_OK;
@@ -891,6 +974,9 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
if (!dring->vaddr || !dring->desc)
return;
+ if (xdp_rxq_info_is_reg(&dring->xdp_rxq))
+ xdp_rxq_info_unreg(&dring->xdp_rxq);
+
for (idx = 0; idx < DESC_NUM; idx++) {
desc = &dring->desc[idx];
if (!desc->addr)
@@ -930,24 +1016,24 @@ static void netsec_free_dring(struct netsec_priv *priv, int id)
static void *netsec_alloc_rx_data(struct netsec_priv *priv,
dma_addr_t *dma_handle, u16 *desc_len)
{
- size_t len = priv->ndev->mtu + ETH_HLEN + 2 * VLAN_HLEN + NET_SKB_PAD +
- NET_IP_ALIGN;
+ size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ size_t payload_len = priv->ndev->mtu + ETH_HLEN + 2 * VLAN_HLEN;
dma_addr_t mapping;
void *buf;
- len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- len = SKB_DATA_ALIGN(len);
+ total_len += SKB_DATA_ALIGN(payload_len + NETSEC_RXBUF_HEADROOM);
- buf = napi_alloc_frag(len);
+ buf = napi_alloc_frag(total_len);
if (!buf)
return NULL;
- mapping = dma_map_single(priv->dev, buf, len, DMA_FROM_DEVICE);
+ mapping = dma_map_single(priv->dev, buf + NETSEC_RXBUF_HEADROOM,
+ payload_len, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(priv->dev, mapping)))
goto err_out;
*dma_handle = mapping;
- *desc_len = len;
+ *desc_len = total_len;
return buf;
@@ -990,10 +1076,27 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
return -ENOMEM;
}
+static void netsec_setup_tx_dring(struct netsec_priv *priv)
+{
+ struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
+ struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
+
+ if (xdp_prog)
+ dring->is_xdp = true;
+ else
+ dring->is_xdp = false;
+}
+
static int netsec_setup_rx_dring(struct netsec_priv *priv)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
- int i;
+ struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
+ int i, err;
+
+ if (xdp_prog)
+ dring->is_xdp = true;
+ else
+ dring->is_xdp = false;
for (i = 0; i < DESC_NUM; i++) {
struct netsec_desc *desc = &dring->desc[i];
@@ -1002,20 +1105,29 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
u16 len;
buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
- if (!buf) {
- netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
+ if (!buf)
goto err_out;
- }
desc->dma_addr = dma_handle;
desc->addr = buf;
desc->len = len;
}
netsec_rx_fill(priv, 0, DESC_NUM);
+ err = xdp_rxq_info_reg(&dring->xdp_rxq, priv->ndev, 0);
+ if (err)
+ goto err_out;
+
+ err = xdp_rxq_info_reg_mem_model(&dring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err) {
+ xdp_rxq_info_unreg(&dring->xdp_rxq);
+ goto err_out;
+ }
return 0;
err_out:
+ netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
return -ENOMEM;
}
@@ -1290,6 +1402,7 @@ static int netsec_netdev_open(struct net_device *ndev)
pm_runtime_get_sync(priv->dev);
+ netsec_setup_tx_dring(priv);
ret = netsec_setup_rx_dring(priv);
if (ret) {
netif_err(priv, probe, priv->ndev,
@@ -1387,6 +1500,9 @@ static int netsec_netdev_init(struct net_device *ndev)
if (ret)
goto err2;
+ spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock);
+ spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock);
+
return 0;
err2:
netsec_free_dring(priv, NETSEC_RING_RX);
@@ -1419,6 +1535,179 @@ static int netsec_netdev_ioctl(struct net_device *ndev, struct ifreq *ifr,
return phy_mii_ioctl(ndev->phydev, ifr, cmd);
}
+/* The current driver only supports 1 Txq, this should run under spin_lock() */
+static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
+ struct xdp_frame *xdpf)
+
+{
+ struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+ struct netsec_tx_pkt_ctrl tx_ctrl = {};
+ struct netsec_desc tx_desc;
+ dma_addr_t dma_handle;
+ u16 filled;
+
+ if (tx_ring->head >= tx_ring->tail)
+ filled = tx_ring->head - tx_ring->tail;
+ else
+ filled = tx_ring->head + DESC_NUM - tx_ring->tail;
+
+ if (DESC_NUM - filled <= 1)
+ return NETSEC_XDP_CONSUMED;
+
+ dma_handle = dma_map_single(priv->dev, xdpf->data, xdpf->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(priv->dev, dma_handle))
+ return NETSEC_XDP_CONSUMED;
+
+ tx_ctrl.cksum_offload_flag = false;
+ tx_ctrl.tcp_seg_offload_flag = false;
+ tx_ctrl.tcp_seg_len = 0;
+
+ tx_desc.dma_addr = dma_handle;
+ tx_desc.addr = xdpf->data;
+ tx_desc.len = xdpf->len;
+
+ netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, NULL);
+
+ return NETSEC_XDP_TX;
+}
+
+static int netsec_xdp_xmit(struct net_device *ndev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct netsec_priv *priv = netdev_priv(ndev);
+ struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+ int drops = 0;
+ int i;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ spin_lock(&tx_ring->lock);
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = netsec_xdp_queue_one(priv, xdpf);
+ if (err != NETSEC_XDP_TX) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ } else {
+ tx_ring->xdp_xmit++;
+ }
+ }
+ spin_unlock(&tx_ring->lock);
+
+ if (unlikely(flags & XDP_XMIT_FLUSH)) {
+ netsec_xdp_ring_tx_db(priv, tx_ring->xdp_xmit);
+ tx_ring->xdp_xmit = 0;
+ }
+
+ return n - drops;
+}
+
+static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
+{
+ struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+ struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+ u32 ret;
+
+ if (unlikely(!xdpf))
+ return NETSEC_XDP_CONSUMED;
+
+ spin_lock(&tx_ring->lock);
+ ret = netsec_xdp_queue_one(priv, xdpf);
+ spin_unlock(&tx_ring->lock);
+
+ return ret;
+}
+
+static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
+ struct xdp_buff *xdp)
+{
+ u32 ret = NETSEC_XDP_PASS;
+ int err;
+ u32 act;
+
+ rcu_read_lock();
+ act = bpf_prog_run_xdp(prog, xdp);
+
+ switch (act) {
+ case XDP_PASS:
+ ret = NETSEC_XDP_PASS;
+ break;
+ case XDP_TX:
+ ret = netsec_xdp_xmit_back(priv, xdp);
+ if (ret != NETSEC_XDP_TX)
+ xdp_return_buff(xdp);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(priv->ndev, xdp, prog);
+ if (!err) {
+ ret = NETSEC_XDP_REDIR;
+ } else {
+ ret = NETSEC_XDP_CONSUMED;
+ xdp_return_buff(xdp);
+ }
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(priv->ndev, prog, act);
+ /* fall through -- handle aborts by dropping packet */
+ case XDP_DROP:
+ ret = NETSEC_XDP_CONSUMED;
+ xdp_return_buff(xdp);
+ break;
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int netsec_xdp_setup(struct netsec_priv *priv, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = priv->ndev;
+ struct bpf_prog *old_prog;
+
+ /* For now just support only the usual MTU sized frames */
+ if (prog && dev->mtu > 1500) {
+ NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
+ return -EOPNOTSUPP;
+ }
+
+ if (netif_running(dev))
+ netsec_netdev_stop(dev);
+
+ /* Detach old prog, if any */
+ old_prog = xchg(&priv->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (netif_running(dev))
+ netsec_netdev_open(dev);
+
+ return 0;
+}
+
+static int netsec_xdp(struct net_device *ndev, struct netdev_bpf *xdp)
+{
+ struct netsec_priv *priv = netdev_priv(ndev);
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return netsec_xdp_setup(priv, xdp->prog, xdp->extack);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops netsec_netdev_ops = {
.ndo_init = netsec_netdev_init,
.ndo_uninit = netsec_netdev_uninit,
@@ -1429,6 +1718,8 @@ static const struct net_device_ops netsec_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_do_ioctl = netsec_netdev_ioctl,
+ .ndo_xdp_xmit = netsec_xdp_xmit,
+ .ndo_bpf = netsec_xdp,
};
static int netsec_of_probe(struct platform_device *pdev,
--
2.7.4
Powered by blists - more mailing lists