[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190524110511.GA27885@apalos>
Date: Fri, 24 May 2019 14:05:11 +0300
From: Ilias Apalodimas <ilias.apalodimas@...aro.org>
To: Ivan Khoronzhuk <ivan.khoronzhuk@...aro.org>
Cc: grygorii.strashko@...com, hawk@...nel.org, davem@...emloft.net,
ast@...nel.org, linux-kernel@...r.kernel.org,
linux-omap@...r.kernel.org, xdp-newbies@...r.kernel.org,
netdev@...r.kernel.org, daniel@...earbox.net,
jakub.kicinski@...ronome.com, john.fastabend@...il.com
Subject: Re: [PATCH net-next 3/3] net: ethernet: ti: cpsw: add XDP support
On Thu, May 23, 2019 at 09:20:35PM +0300, Ivan Khoronzhuk wrote:
> Add XDP support based on rx page_pool allocator, one frame per page.
> Page pool allocator is used with assumption that only one rx_handler
> is running simultaneously. DMA map/unmap is reused from page pool
> despite there is no need to map whole page.
>
> Due to specific of cpsw, the same TX/RX handler can be used by 2
> network devices, so special fields in buffer are added to identify
> an interface the frame is destined to. Thus XDP works for both
> interfaces, that allows to test xdp redirect between two interfaces
> easily.
>
> XDP prog is common for all channels till appropriate changes are added
> in XDP infrastructure.
>
> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@...aro.org>
> ---
> drivers/net/ethernet/ti/Kconfig | 1 +
> drivers/net/ethernet/ti/cpsw.c | 555 ++++++++++++++++++++++---
> drivers/net/ethernet/ti/cpsw_ethtool.c | 53 +++
> drivers/net/ethernet/ti/cpsw_priv.h | 7 +
> 4 files changed, 554 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
> index bd05a977ee7e..3cb8c5214835 100644
> --- a/drivers/net/ethernet/ti/Kconfig
> +++ b/drivers/net/ethernet/ti/Kconfig
> @@ -50,6 +50,7 @@ config TI_CPSW
> depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
> select TI_DAVINCI_MDIO
> select MFD_SYSCON
> + select PAGE_POOL
> select REGMAP
> ---help---
> This driver supports TI's CPSW Ethernet Switch.
> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
> index 87a600aeee4a..274e6b64ea9e 100644
> --- a/drivers/net/ethernet/ti/cpsw.c
> +++ b/drivers/net/ethernet/ti/cpsw.c
> @@ -31,6 +31,10 @@
> #include <linux/if_vlan.h>
> #include <linux/kmemleak.h>
> #include <linux/sys_soc.h>
> +#include <net/page_pool.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
> +#include <linux/filter.h>
>
> #include <linux/pinctrl/consumer.h>
> #include <net/pkt_cls.h>
> @@ -60,6 +64,10 @@ static int descs_pool_size = CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT;
> module_param(descs_pool_size, int, 0444);
> MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
>
> +/* The buf includes headroom compatible with both skb and xdpf */
> +#define CPSW_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN)
> +#define CPSW_HEADROOM ALIGN(CPSW_HEADROOM_NA, sizeof(long))
> +
> #define for_each_slave(priv, func, arg...) \
> do { \
> struct cpsw_slave *slave; \
> @@ -74,6 +82,8 @@ MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
> (func)(slave++, ##arg); \
> } while (0)
>
> +#define CPSW_XMETA_OFFSET ALIGN(sizeof(struct xdp_frame), sizeof(long))
> +
> static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
> __be16 proto, u16 vid);
>
> @@ -337,24 +347,58 @@ void cpsw_intr_disable(struct cpsw_common *cpsw)
> return;
> }
>
> +static int cpsw_is_xdpf_handle(void *handle)
> +{
> + return (unsigned long)handle & BIT(0);
> +}
> +
> +static void *cpsw_xdpf_to_handle(struct xdp_frame *xdpf)
> +{
> + return (void *)((unsigned long)xdpf | BIT(0));
> +}
> +
> +static struct xdp_frame *cpsw_handle_to_xdpf(void *handle)
> +{
> + return (struct xdp_frame *)((unsigned long)handle & ~BIT(0));
> +}
> +
> +struct __aligned(sizeof(long)) cpsw_meta_xdp {
> + struct net_device *ndev;
> + int ch;
> +};
> +
> int cpsw_tx_handler(void *token, int len, int status)
> {
> + struct cpsw_meta_xdp *xmeta;
> + struct xdp_frame *xdpf;
> + struct net_device *ndev;
> struct netdev_queue *txq;
> - struct sk_buff *skb = token;
> - struct net_device *ndev = skb->dev;
> - struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
> + struct sk_buff *skb;
> + int ch;
> +
> + if (cpsw_is_xdpf_handle(token)) {
> + xdpf = cpsw_handle_to_xdpf(token);
> + xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
> + ndev = xmeta->ndev;
> + ch = xmeta->ch;
> + xdp_return_frame_rx_napi(xdpf);
> + } else {
> + skb = token;
> + ndev = skb->dev;
> + ch = skb_get_queue_mapping(skb);
> + cpts_tx_timestamp(ndev_to_cpsw(ndev)->cpts, skb);
> + dev_kfree_skb_any(skb);
> + }
>
> /* Check whether the queue is stopped due to stalled tx dma, if the
> * queue is stopped then start the queue as we have free desc for tx
> */
> - txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb));
> + txq = netdev_get_tx_queue(ndev, ch);
> if (unlikely(netif_tx_queue_stopped(txq)))
> netif_tx_wake_queue(txq);
>
> - cpts_tx_timestamp(cpsw->cpts, skb);
> ndev->stats.tx_packets++;
> ndev->stats.tx_bytes += len;
> - dev_kfree_skb_any(skb);
> return 0;
> }
>
> @@ -401,22 +445,229 @@ static void cpsw_rx_vlan_encap(struct sk_buff *skb)
> }
> }
>
> +static int cpsw_xdp_tx_frame_mapped(struct cpsw_priv *priv,
> + struct xdp_frame *xdpf, struct page *page)
> +{
> + struct cpsw_common *cpsw = priv->cpsw;
> + struct cpsw_meta_xdp *xmeta;
> + struct netdev_queue *txq;
> + struct cpdma_chan *txch;
> + dma_addr_t dma;
> + int ret;
> +
> + xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
> + xmeta->ch = 0;
> +
> + txch = cpsw->txv[0].ch;
> + dma = (xdpf->data - (void *)xdpf) + page->dma_addr;
> + ret = cpdma_chan_submit_mapped(txch, cpsw_xdpf_to_handle(xdpf), dma,
> + xdpf->len,
> + priv->emac_port + cpsw->data.dual_emac);
> + if (ret) {
> + xdp_return_frame_rx_napi(xdpf);
> + goto stop;
> + }
> +
> + /* no tx desc - stop sending us tx frames */
> + if (unlikely(!cpdma_check_free_tx_desc(txch)))
> + goto stop;
> +
> + return ret;
> +stop:
> + txq = netdev_get_tx_queue(priv->ndev, 0);
> + netif_tx_stop_queue(txq);
> +
> + /* Barrier, so that stop_queue visible to other cpus */
> + smp_mb__after_atomic();
> +
> + if (cpdma_check_free_tx_desc(txch))
> + netif_tx_wake_queue(txq);
> +
> + return ret;
> +}
> +
> +static int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf)
> +{
> + struct cpsw_common *cpsw = priv->cpsw;
> + struct cpsw_meta_xdp *xmeta;
> + struct netdev_queue *txq;
> + struct cpdma_chan *txch;
> + int ret;
> +
> + xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
> + if (sizeof(*xmeta) > xdpf->headroom)
> + return -EINVAL;
> +
> + xmeta->ndev = priv->ndev;
> + xmeta->ch = 0;
> +
> + txch = cpsw->txv[0].ch;
> + ret = cpdma_chan_submit(txch, cpsw_xdpf_to_handle(xdpf), xdpf->data,
> + xdpf->len,
> + priv->emac_port + cpsw->data.dual_emac);
> + if (ret) {
> + xdp_return_frame_rx_napi(xdpf);
> + goto stop;
> + }
> +
> + /* no tx desc - stop sending us tx frames */
> + if (unlikely(!cpdma_check_free_tx_desc(txch)))
> + goto stop;
> +
> + return ret;
> +stop:
> + txq = netdev_get_tx_queue(priv->ndev, 0);
> + netif_tx_stop_queue(txq);
> +
> + /* Barrier, so that stop_queue visible to other cpus */
> + smp_mb__after_atomic();
> +
> + if (cpdma_check_free_tx_desc(txch))
> + netif_tx_wake_queue(txq);
> +
> + return ret;
> +}
> +
> +static int cpsw_run_xdp(struct cpsw_priv *priv, struct cpsw_vector *rxv,
> + struct xdp_buff *xdp, struct page *page)
> +{
> + struct net_device *ndev = priv->ndev;
> + struct xdp_frame *xdpf;
> + struct bpf_prog *prog;
> + int ret = 1;
> + u32 act;
> +
> + rcu_read_lock();
> +
> + prog = READ_ONCE(priv->xdp_prog);
> + if (!prog) {
> + ret = 0;
> + goto out;
> + }
> +
> + act = bpf_prog_run_xdp(prog, xdp);
> + switch (act) {
> + case XDP_PASS:
> + ret = 0;
> + break;
> + case XDP_TX:
> + xdpf = convert_to_xdp_frame(xdp);
> + if (unlikely(!xdpf))
> + xdp_return_buff(xdp);
> + else
> + cpsw_xdp_tx_frame_mapped(priv, xdpf, page);
> + break;
> + case XDP_REDIRECT:
> + if (xdp_do_redirect(ndev, xdp, prog))
> + xdp_return_buff(xdp);
> + else
> + ret = 2;
> + break;
> + default:
> + bpf_warn_invalid_xdp_action(act);
> + /* fall through */
> + case XDP_ABORTED:
> + trace_xdp_exception(ndev, prog, act);
> + /* fall through -- handle aborts by dropping packet */
> + case XDP_DROP:
> + xdp_return_buff(xdp);
> + break;
> + }
> +out:
> + rcu_read_unlock();
> + return ret;
> +}
> +
> +static unsigned int cpsw_rxbuf_total_len(unsigned int len)
> +{
> + len += CPSW_HEADROOM;
> + len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> + return SKB_DATA_ALIGN(len);
> +}
> +
> +struct page_pool *cpsw_create_rx_pool(struct cpsw_common *cpsw)
> +{
> + struct page_pool_params pp_params = { 0 };
> + struct page_pool *pool;
> +
> + pp_params.order = 0;
> + pp_params.flags = PP_FLAG_DMA_MAP;
> +
> + /* set it to number of RX descriptors, but can be more */
> + pp_params.pool_size = cpdma_get_num_rx_descs(cpsw->dma);
> + pp_params.nid = NUMA_NO_NODE;
> + pp_params.dma_dir = DMA_BIDIRECTIONAL;
> + pp_params.dev = cpsw->dev;
> +
> + pool = page_pool_create(&pp_params);
> + if (IS_ERR(pool))
> + dev_err(cpsw->dev, "cannot create rx page pool\n");
> +
> + return pool;
> +}
> +
> +static struct page *cpsw_alloc_page(struct cpsw_common *cpsw)
> +{
> + struct page_pool *pool = cpsw->rx_page_pool;
> + struct page *page, *prev_page = NULL;
> + int try = pool->p.pool_size << 2;
> + int start_free = 0, ret;
> +
> + do {
> + page = page_pool_dev_alloc_pages(pool);
> + if (!page)
> + return NULL;
> +
> + /* if netstack has page_pool recycling remove the rest */
> + if (page_ref_count(page) == 1)
> + break;
> +
> + /* start free pages in use, shouldn't happen */
> + if (prev_page == page || start_free) {
> + /* dma unmap/puts page if rfcnt != 1 */
> + page_pool_recycle_direct(pool, page);
> + start_free = 1;
> + continue;
> + }
> +
> + /* if refcnt > 1, page has been holding by netstack, it's pity,
> + * so put it to the ring to be consumed later when fast cash is
s/cash/cache
> + * empty. If ring is full then free page by recycling as above.
> + */
> + ret = ptr_ring_produce(&pool->ring, page);
> + if (ret) {
> + page_pool_recycle_direct(pool, page);
> + continue;
> + }
Although this should be fine since this part won't be called during the driver
init, i think i'd prefer unmapping the buffer and let the network stack free it,
instead of pushing it for recycling. The occurence should be pretty low, so
allocating a buffer every once in a while shouldn't have a noticeable
performance impact
> +
> + if (!prev_page)
> + prev_page = page;
> + } while (try--);
> +
> + return page;
> +}
> +
> static int cpsw_rx_handler(void *token, int len, int status)
> {
> - struct cpdma_chan *ch;
> - struct sk_buff *skb = token;
> - struct sk_buff *new_skb;
> - struct net_device *ndev = skb->dev;
> - int ret = 0, port;
> - struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
> + struct page *new_page, *page = token;
> + void *pa = page_address(page);
> + struct cpsw_meta_xdp *xmeta = pa + CPSW_XMETA_OFFSET;
> + struct cpsw_common *cpsw = ndev_to_cpsw(xmeta->ndev);
> + int pkt_size = cpsw->rx_packet_max;
> + int ret = 0, port, ch = xmeta->ch;
> + struct page_pool *pool = cpsw->rx_page_pool;
> + int headroom = CPSW_HEADROOM;
> + struct net_device *ndev = xmeta->ndev;
> + int flush = 0;
> struct cpsw_priv *priv;
> + struct sk_buff *skb;
> + struct xdp_buff xdp;
> + dma_addr_t dma;
>
> if (cpsw->data.dual_emac) {
> port = CPDMA_RX_SOURCE_PORT(status);
> - if (port) {
> + if (port)
> ndev = cpsw->slaves[--port].ndev;
> - skb->dev = ndev;
> - }
> }
>
> if (unlikely(status < 0) || unlikely(!netif_running(ndev))) {
> @@ -429,47 +680,101 @@ static int cpsw_rx_handler(void *token, int len, int status)
> * in reducing of the number of rx descriptor in
> * DMA engine, requeue skb back to cpdma.
> */
> - new_skb = skb;
> + new_page = page;
> goto requeue;
> }
>
> /* the interface is going down, skbs are purged */
> - dev_kfree_skb_any(skb);
> + page_pool_recycle_direct(pool, page);
> return 0;
> }
>
> - new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max);
> - if (new_skb) {
> - skb_copy_queue_mapping(new_skb, skb);
> - skb_put(skb, len);
> - if (status & CPDMA_RX_VLAN_ENCAP)
> - cpsw_rx_vlan_encap(skb);
> - priv = netdev_priv(ndev);
> - if (priv->rx_ts_enabled)
> - cpts_rx_timestamp(cpsw->cpts, skb);
> - skb->protocol = eth_type_trans(skb, ndev);
> - netif_receive_skb(skb);
> - ndev->stats.rx_bytes += len;
> - ndev->stats.rx_packets++;
> - kmemleak_not_leak(new_skb);
> - } else {
> + new_page = cpsw_alloc_page(cpsw);
> + if (unlikely(!new_page)) {
> + new_page = page;
> ndev->stats.rx_dropped++;
> - new_skb = skb;
> + goto requeue;
> }
>
> + priv = netdev_priv(ndev);
> + if (priv->xdp_prog) {
> + if (status & CPDMA_RX_VLAN_ENCAP) {
> + xdp.data = (void *)pa + CPSW_HEADROOM +
> + CPSW_RX_VLAN_ENCAP_HDR_SIZE;
> + xdp.data_end = xdp.data + len -
> + CPSW_RX_VLAN_ENCAP_HDR_SIZE;
> + } else {
> + xdp.data = (void *)pa + CPSW_HEADROOM;
> + xdp.data_end = xdp.data + len;
> + }
> +
> + xdp_set_data_meta_invalid(&xdp);
> +
> + xdp.data_hard_start = pa;
> + xdp.rxq = &priv->xdp_rxq[ch];
> +
> + ret = cpsw_run_xdp(priv, &cpsw->rxv[ch], &xdp, page);
> + if (ret) {
> + if (ret == 2)
> + flush = 1;
> +
> + goto requeue;
> + }
> +
> + /* XDP prog might have changed packet data and boundaries */
> + len = xdp.data_end - xdp.data;
> + headroom = xdp.data - xdp.data_hard_start;
> + }
> +
> + /* Build skb and pass it to netstack if XDP off or XDP prog
> + * returned XDP_PASS
> + */
> + skb = build_skb(pa, cpsw_rxbuf_total_len(pkt_size));
> + if (!skb) {
> + ndev->stats.rx_dropped++;
> + page_pool_recycle_direct(pool, page);
> + goto requeue;
> + }
> +
> + skb_reserve(skb, headroom);
> + skb_put(skb, len);
> + skb->dev = ndev;
> + if (status & CPDMA_RX_VLAN_ENCAP)
> + cpsw_rx_vlan_encap(skb);
> + if (priv->rx_ts_enabled)
> + cpts_rx_timestamp(cpsw->cpts, skb);
> + skb->protocol = eth_type_trans(skb, ndev);
> +
> + /* recycle page before increasing refcounter, it allows to hold page in
> + * page pool cache improving allocation time, see cpsw_alloc_page().
> + */
> + page_pool_recycle_direct(pool, page);
> +
> + /* remove once ordinary netstack has page_pool recycling */
> + page_ref_inc(page);
> +
> + netif_receive_skb(skb);
> +
> + ndev->stats.rx_bytes += len;
> + ndev->stats.rx_packets++;
> +
> requeue:
> if (netif_dormant(ndev)) {
> - dev_kfree_skb_any(new_skb);
> - return 0;
> + page_pool_recycle_direct(pool, new_page);
> + return flush;
> }
>
> - ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch;
> - ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
> - skb_tailroom(new_skb), 0);
> + xmeta = page_address(new_page) + CPSW_XMETA_OFFSET;
> + xmeta->ndev = ndev;
> + xmeta->ch = ch;
> +
> + dma = new_page->dma_addr + CPSW_HEADROOM;
> + ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
> + pkt_size, 0);
> if (WARN_ON(ret < 0))
> - dev_kfree_skb_any(new_skb);
> + page_pool_recycle_direct(pool, new_page);
>
> - return 0;
> + return flush;
> }
>
> void cpsw_split_res(struct cpsw_common *cpsw)
> @@ -644,7 +949,7 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
> static int cpsw_rx_mq_poll(struct napi_struct *napi_rx, int budget)
> {
> u32 ch_map;
> - int num_rx, cur_budget, ch;
> + int num_rx, cur_budget, ch, flush;
> struct cpsw_common *cpsw = napi_to_cpsw(napi_rx);
> struct cpsw_vector *rxv;
>
> @@ -660,8 +965,12 @@ static int cpsw_rx_mq_poll(struct napi_struct *napi_rx, int budget)
> else
> cur_budget = rxv->budget;
>
> - cpdma_chan_process(rxv->ch, &cur_budget);
> + flush = cpdma_chan_process(rxv->ch, &cur_budget);
> num_rx += cur_budget;
> +
> + if (flush)
> + xdp_do_flush_map();
> +
> if (num_rx >= budget)
> break;
> }
> @@ -677,10 +986,15 @@ static int cpsw_rx_mq_poll(struct napi_struct *napi_rx, int budget)
> static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
> {
> struct cpsw_common *cpsw = napi_to_cpsw(napi_rx);
> - int num_rx;
> + struct cpsw_vector *rxv;
> + int num_rx, flush;
>
> num_rx = budget;
> - cpdma_chan_process(cpsw->rxv[0].ch, &num_rx);
> + rxv = &cpsw->rxv[0];
> + flush = cpdma_chan_process(rxv->ch, &num_rx);
> + if (flush)
> + xdp_do_flush_map();
> +
> if (num_rx < budget) {
> napi_complete_done(napi_rx, num_rx);
> writel(0xff, &cpsw->wr_regs->rx_en);
> @@ -1042,33 +1356,38 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
> int cpsw_fill_rx_channels(struct cpsw_priv *priv)
> {
> struct cpsw_common *cpsw = priv->cpsw;
> - struct sk_buff *skb;
> + struct cpsw_meta_xdp *xmeta;
> + struct page_pool *pool;
> + struct page *page;
> int ch_buf_num;
> int ch, i, ret;
> + dma_addr_t dma;
>
> + pool = cpsw->rx_page_pool;
> for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
> ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
> for (i = 0; i < ch_buf_num; i++) {
> - skb = __netdev_alloc_skb_ip_align(priv->ndev,
> - cpsw->rx_packet_max,
> - GFP_KERNEL);
> - if (!skb) {
> - cpsw_err(priv, ifup, "cannot allocate skb\n");
> + page = cpsw_alloc_page(cpsw);
> + if (!page) {
> + cpsw_err(priv, ifup, "allocate rx page err\n");
> return -ENOMEM;
> }
>
> - skb_set_queue_mapping(skb, ch);
> - ret = cpdma_chan_submit(cpsw->rxv[ch].ch, skb,
> - skb->data, skb_tailroom(skb),
> - 0);
> + xmeta = page_address(page) + CPSW_XMETA_OFFSET;
> + xmeta->ndev = priv->ndev;
> + xmeta->ch = ch;
> +
> + dma = page->dma_addr + CPSW_HEADROOM;
> + ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, page,
> + dma, cpsw->rx_packet_max,
> + 0);
> if (ret < 0) {
> cpsw_err(priv, ifup,
> "cannot submit skb to channel %d rx, error %d\n",
> ch, ret);
> - kfree_skb(skb);
> + page_pool_recycle_direct(pool, page);
> return ret;
> }
> - kmemleak_not_leak(skb);
> }
>
> cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n",
> @@ -2011,6 +2330,64 @@ static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
> }
> }
>
> +static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf)
> +{
> + struct bpf_prog *prog = bpf->prog;
> +
> + if (!priv->xdpi.prog && !prog)
> + return 0;
> +
> + if (!xdp_attachment_flags_ok(&priv->xdpi, bpf))
> + return -EBUSY;
> +
> + WRITE_ONCE(priv->xdp_prog, prog);
> +
> + xdp_attachment_setup(&priv->xdpi, bpf);
> +
> + return 0;
> +}
> +
> +static int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
> +{
> + struct cpsw_priv *priv = netdev_priv(ndev);
> +
> + switch (bpf->command) {
> + case XDP_SETUP_PROG:
> + return cpsw_xdp_prog_setup(priv, bpf);
> +
> + case XDP_QUERY_PROG:
> + return xdp_attachment_query(&priv->xdpi, bpf);
> +
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n,
> + struct xdp_frame **frames, u32 flags)
> +{
> + struct cpsw_priv *priv = netdev_priv(ndev);
> + struct xdp_frame *xdpf;
> + int i, drops = 0;
> +
> + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> + return -EINVAL;
> +
> + for (i = 0; i < n; i++) {
> + xdpf = frames[i];
> + if (xdpf->len < CPSW_MIN_PACKET_SIZE) {
> + xdp_return_frame_rx_napi(xdpf);
> + drops++;
> + continue;
> + }
> +
> + if (cpsw_xdp_tx_frame(priv, xdpf))
> + drops++;
> + }
> +
> + return n - drops;
> +}
> +
> #ifdef CONFIG_NET_POLL_CONTROLLER
> static void cpsw_ndo_poll_controller(struct net_device *ndev)
> {
> @@ -2039,6 +2416,8 @@ static const struct net_device_ops cpsw_netdev_ops = {
> .ndo_vlan_rx_add_vid = cpsw_ndo_vlan_rx_add_vid,
> .ndo_vlan_rx_kill_vid = cpsw_ndo_vlan_rx_kill_vid,
> .ndo_setup_tc = cpsw_ndo_setup_tc,
> + .ndo_bpf = cpsw_ndo_bpf,
> + .ndo_xdp_xmit = cpsw_ndo_xdp_xmit,
> };
>
> static void cpsw_get_drvinfo(struct net_device *ndev,
> @@ -2335,11 +2714,24 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
> ndev->netdev_ops = &cpsw_netdev_ops;
> ndev->ethtool_ops = &cpsw_ethtool_ops;
>
> + ret = xdp_rxq_info_reg(priv_sl2->xdp_rxq, ndev, 0);
> + if (ret)
> + return ret;
> +
> + ret = xdp_rxq_info_reg_mem_model(priv_sl2->xdp_rxq, MEM_TYPE_PAGE_POOL,
> + cpsw->rx_page_pool);
> + if (ret) {
> + xdp_rxq_info_unreg(priv_sl2->xdp_rxq);
> + return ret;
> + }
> +
> /* register the network device */
> SET_NETDEV_DEV(ndev, cpsw->dev);
> ret = register_netdev(ndev);
> - if (ret)
> + if (ret) {
> dev_err(cpsw->dev, "cpsw: error registering net device\n");
> + xdp_rxq_info_unreg(priv_sl2->xdp_rxq);
> + }
>
> return ret;
> }
> @@ -2457,19 +2849,25 @@ static int cpsw_probe(struct platform_device *pdev)
> if (ret)
> goto clean_dt_ret;
>
> + cpsw->rx_page_pool = cpsw_create_rx_pool(cpsw);
> + if (IS_ERR(cpsw->rx_page_pool)) {
> + ret = PTR_ERR(cpsw->rx_page_pool);
> + goto clean_cpts;
> + }
> +
> ch = cpsw->quirk_irq ? 0 : 7;
> cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, ch, cpsw_tx_handler, 0);
> if (IS_ERR(cpsw->txv[0].ch)) {
> dev_err(dev, "error initializing tx dma channel\n");
> ret = PTR_ERR(cpsw->txv[0].ch);
> - goto clean_cpts;
> + goto clean_pool;
> }
>
> cpsw->rxv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
> if (IS_ERR(cpsw->rxv[0].ch)) {
> dev_err(dev, "error initializing rx dma channel\n");
> ret = PTR_ERR(cpsw->rxv[0].ch);
> - goto clean_cpts;
> + goto clean_pool;
> }
> cpsw_split_res(cpsw);
>
> @@ -2478,7 +2876,7 @@ static int cpsw_probe(struct platform_device *pdev)
> CPSW_MAX_QUEUES, CPSW_MAX_QUEUES);
> if (!ndev) {
> dev_err(dev, "error allocating net_device\n");
> - goto clean_cpts;
> + goto clean_pool;
> }
>
> platform_set_drvdata(pdev, ndev);
> @@ -2499,6 +2897,15 @@ static int cpsw_probe(struct platform_device *pdev)
>
> memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
>
> + ret = xdp_rxq_info_reg(priv->xdp_rxq, ndev, 0);
> + if (ret)
> + goto clean_pool;
> +
> + ret = xdp_rxq_info_reg_mem_model(priv->xdp_rxq, MEM_TYPE_PAGE_POOL,
> + cpsw->rx_page_pool);
> + if (ret)
> + goto clean_rxq_info;
> +
> cpsw->slaves[0].ndev = ndev;
>
> ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
> @@ -2518,7 +2925,7 @@ static int cpsw_probe(struct platform_device *pdev)
> if (ret) {
> dev_err(dev, "error registering net device\n");
> ret = -ENODEV;
> - goto clean_cpts;
> + goto clean_rxq_info;
> }
>
> if (cpsw->data.dual_emac) {
> @@ -2561,6 +2968,10 @@ static int cpsw_probe(struct platform_device *pdev)
>
> clean_unregister_netdev_ret:
> unregister_netdev(ndev);
> +clean_rxq_info:
> + xdp_rxq_info_unreg(priv->xdp_rxq);
> +clean_pool:
> + page_pool_destroy(cpsw->rx_page_pool);
> clean_cpts:
> cpts_release(cpsw->cpts);
> cpdma_ctlr_destroy(cpsw->dma);
> @@ -2572,11 +2983,26 @@ static int cpsw_probe(struct platform_device *pdev)
> return ret;
> }
>
> +void cpsw_xdp_rxq_unreg(struct cpsw_common *cpsw, int ch)
> +{
> + struct cpsw_slave *slave;
> + struct cpsw_priv *priv;
> + int i;
> +
> + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
> + if (!slave->ndev)
> + continue;
> +
> + priv = netdev_priv(slave->ndev);
> + xdp_rxq_info_unreg(&priv->xdp_rxq[ch]);
> + }
> +}
> +
> static int cpsw_remove(struct platform_device *pdev)
> {
> struct net_device *ndev = platform_get_drvdata(pdev);
> struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
> - int ret;
> + int i, ret;
>
> ret = pm_runtime_get_sync(&pdev->dev);
> if (ret < 0) {
> @@ -2590,6 +3016,11 @@ static int cpsw_remove(struct platform_device *pdev)
>
> cpts_release(cpsw->cpts);
> cpdma_ctlr_destroy(cpsw->dma);
> +
> + for (i = 0; i < cpsw->rx_ch_num; i++)
> + cpsw_xdp_rxq_unreg(cpsw, i);
> +
> + page_pool_destroy(cpsw->rx_page_pool);
> cpsw_remove_dt(pdev);
> pm_runtime_put_sync(&pdev->dev);
> pm_runtime_disable(&pdev->dev);
> diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
> index 0c08ec91635a..dbe4bc5513c6 100644
> --- a/drivers/net/ethernet/ti/cpsw_ethtool.c
> +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
> @@ -14,6 +14,7 @@
> #include <linux/phy.h>
> #include <linux/pm_runtime.h>
> #include <linux/skbuff.h>
> +#include <net/page_pool.h>
>
> #include "cpsw.h"
> #include "cpts.h"
> @@ -531,6 +532,42 @@ static int cpsw_check_ch_settings(struct cpsw_common *cpsw,
> return 0;
> }
>
> +static int cpsw_xdp_rxq_reg(struct cpsw_common *cpsw, int ch)
> +{
> + struct cpsw_slave *slave;
> + struct cpsw_priv *priv;
> + int i, ret;
> +
> + /* As channels are common for both ports sharing same queues, xdp_rxq
> + * information also becomes shared and used by every packet on this
> + * channel. But exch xdp_rxq holds link on netdev, which by the theory
> + * can have different memory model and so, network device must hold it's
> + * own set of rxq and thus both netdevs should be prepared
> + */
> + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
> + if (!slave->ndev)
> + continue;
> +
> + priv = netdev_priv(slave->ndev);
> +
> + ret = xdp_rxq_info_reg(&priv->xdp_rxq[ch], priv->ndev, ch);
> + if (ret)
> + goto err;
> +
> + ret = xdp_rxq_info_reg_mem_model(&priv->xdp_rxq[ch],
> + MEM_TYPE_PAGE_POOL,
> + cpsw->rx_page_pool);
> + if (ret)
> + goto err;
> + }
> +
> + return ret;
> +
> +err:
> + cpsw_xdp_rxq_unreg(cpsw, ch);
> + return ret;
> +}
> +
> static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx,
> cpdma_handler_fn rx_handler)
> {
> @@ -562,6 +599,11 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx,
> if (!vec[*ch].ch)
> return -EINVAL;
>
> + if (rx && cpsw_xdp_rxq_reg(cpsw, *ch)) {
> + cpdma_chan_destroy(vec[*ch].ch);
> + return -EINVAL;
> + }
> +
> cpsw_info(priv, ifup, "created new %d %s channel\n", *ch,
> (rx ? "rx" : "tx"));
> (*ch)++;
> @@ -570,6 +612,9 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx,
> while (*ch > ch_num) {
> (*ch)--;
>
> + if (rx)
> + cpsw_xdp_rxq_unreg(cpsw, *ch);
> +
> ret = cpdma_chan_destroy(vec[*ch].ch);
> if (ret)
> return ret;
> @@ -654,6 +699,7 @@ int cpsw_set_ringparam(struct net_device *ndev,
> {
> struct cpsw_priv *priv = netdev_priv(ndev);
> struct cpsw_common *cpsw = priv->cpsw;
> + struct page_pool *pool;
> int ret;
>
> /* ignore ering->tx_pending - only rx_pending adjustment is supported */
> @@ -666,6 +712,10 @@ int cpsw_set_ringparam(struct net_device *ndev,
> if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma))
> return 0;
>
> + pool = cpsw_create_rx_pool(cpsw);
> + if (IS_ERR(pool))
> + return PTR_ERR(pool);
> +
> cpsw_suspend_data_pass(ndev);
>
> cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
> @@ -673,6 +723,9 @@ int cpsw_set_ringparam(struct net_device *ndev,
> if (cpsw->usage_count)
> cpdma_chan_split_pool(cpsw->dma);
>
> + page_pool_destroy(cpsw->rx_page_pool);
> + cpsw->rx_page_pool = pool;
> +
> ret = cpsw_resume_data_pass(ndev);
> if (!ret)
> return 0;
> diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
> index 2ecb3af59fe9..884ce6343a7d 100644
> --- a/drivers/net/ethernet/ti/cpsw_priv.h
> +++ b/drivers/net/ethernet/ti/cpsw_priv.h
> @@ -346,6 +346,7 @@ struct cpsw_common {
> int rx_ch_num, tx_ch_num;
> int speed;
> int usage_count;
> + struct page_pool *rx_page_pool;
> };
>
> struct cpsw_priv {
> @@ -360,6 +361,10 @@ struct cpsw_priv {
> int shp_cfg_speed;
> int tx_ts_enabled;
> int rx_ts_enabled;
> + struct bpf_prog *xdp_prog;
> + struct xdp_rxq_info xdp_rxq[CPSW_MAX_QUEUES];
> + struct xdp_attachment_info xdpi;
> +
> u32 emac_port;
> struct cpsw_common *cpsw;
> };
> @@ -391,6 +396,8 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv);
> void cpsw_intr_enable(struct cpsw_common *cpsw);
> void cpsw_intr_disable(struct cpsw_common *cpsw);
> int cpsw_tx_handler(void *token, int len, int status);
> +void cpsw_xdp_rxq_unreg(struct cpsw_common *cpsw, int ch);
> +struct page_pool *cpsw_create_rx_pool(struct cpsw_common *cpsw);
>
> /* ethtool */
> u32 cpsw_get_msglevel(struct net_device *ndev);
> --
> 2.17.1
>
Powered by blists - more mailing lists