lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 29 Sep 2022 13:11:03 +0000
From:   Shenwei Wang <shenwei.wang@....com>
To:     Jesper Dangaard Brouer <jbrouer@...hat.com>,
        Joakim Zhang <qiangqing.zhang@....com>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>
CC:     "brouer@...hat.com" <brouer@...hat.com>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Jesper Dangaard Brouer <hawk@...nel.org>,
        John Fastabend <john.fastabend@...il.com>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "imx@...ts.linux.dev" <imx@...ts.linux.dev>
Subject: RE: [EXT] Re: [PATCH 1/1] net: fec: add initial XDP support



> -----Original Message-----
> From: Jesper Dangaard Brouer <jbrouer@...hat.com>
> Sent: Thursday, September 29, 2022 5:17 AM
> To: Shenwei Wang <shenwei.wang@....com>; Joakim Zhang
> <qiangqing.zhang@....com>; David S. Miller <davem@...emloft.net>; Eric
> Dumazet <edumazet@...gle.com>; Jakub Kicinski <kuba@...nel.org>; Paolo
> Abeni <pabeni@...hat.com>
> > diff --git a/drivers/net/ethernet/freescale/fec.h
> > b/drivers/net/ethernet/freescale/fec.h
> > index b0100fe3c9e4..f7531503aa95 100644
> > --- a/drivers/net/ethernet/freescale/fec.h
> > +++ b/drivers/net/ethernet/freescale/fec.h
> > @@ -346,8 +346,10 @@ struct bufdesc_ex {
> >    * the skbuffer directly.
> >    */
> >
> > +#define FEC_ENET_XDP_HEADROOM        (512) /* XDP_PACKET_HEADROOM
> + NET_IP_ALIGN) */
> 
> Why the large headroom?
> 

The accurate value here should be "XDP_PACKET_HEADROOM (256) + NET_IP_ALIGN" which then aligns with 64 bytes. So 256 + 64 should be enough here.
 
> > +
> >   #define FEC_ENET_RX_PAGES   256
> > -#define FEC_ENET_RX_FRSIZE   2048
> > +#define FEC_ENET_RX_FRSIZE   (PAGE_SIZE - FEC_ENET_XDP_HEADROOM)
> 
> This FEC_ENET_RX_FRSIZE is likely wrong, because you also need to reserve 320
> bytes at the end for struct skb_shared_info.
> (320 calculated as SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
> 
> >   #define FEC_ENET_RX_FRPPG   (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
> >   #define RX_RING_SIZE                (FEC_ENET_RX_FRPPG *
> FEC_ENET_RX_PAGES)
> >   #define FEC_ENET_TX_FRSIZE  2048
> > @@ -517,6 +519,22 @@ struct bufdesc_prop {
> [...]
> 
> > diff --git a/drivers/net/ethernet/freescale/fec_main.c
> > b/drivers/net/ethernet/freescale/fec_main.c
> > index 59921218a8a4..2e30182ed770 100644
> > --- a/drivers/net/ethernet/freescale/fec_main.c
> > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > @@ -66,6 +66,8 @@
> >   #include <linux/mfd/syscon.h>
> >   #include <linux/regmap.h>
> >   #include <soc/imx/cpuidle.h>
> > +#include <linux/filter.h>
> > +#include <linux/bpf.h>
> >
> >   #include <asm/cacheflush.h>
> >
> > @@ -87,6 +89,11 @@ static const u16 fec_enet_vlan_pri_to_queue[8] = {0, 0,
> 1, 1, 1, 2, 2, 2};
> >   #define FEC_ENET_OPD_V      0xFFF0
> >   #define FEC_MDIO_PM_TIMEOUT  100 /* ms */
> >
> > +#define FEC_ENET_XDP_PASS          0
> > +#define FEC_ENET_XDP_CONSUMED      BIT(0)
> > +#define FEC_ENET_XDP_TX            BIT(1)
> > +#define FEC_ENET_XDP_REDIR         BIT(2)
> > +
> >   struct fec_devinfo {
> >       u32 quirks;
> >   };
> > @@ -422,6 +429,49 @@ fec_enet_clear_csum(struct sk_buff *skb, struct
> net_device *ndev)
> >       return 0;
> >   }
> >
> > +static int
> > +fec_enet_create_page_pool(struct fec_enet_private *fep,
> > +                       struct fec_enet_priv_rx_q *rxq, int size) {
> > +     struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
> > +     struct page_pool_params pp_params = {
> > +             .order = 0,
> > +             .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
> > +             .pool_size = size,
> > +             .nid = dev_to_node(&fep->pdev->dev),
> > +             .dev = &fep->pdev->dev,
> > +             .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
> > +             .offset = FEC_ENET_XDP_HEADROOM,
> > +             .max_len = FEC_ENET_RX_FRSIZE,
> 
> XDP BPF-prog cannot access last 320 bytes, so FEC_ENET_RX_FRSIZE is wrong
> here.
> 

So the FEC_ENET_RX_FRSIZE should subtract the sizeof(struct skb_shared_info) in the definition, right? 

> > +     };
> > +     int err;
> > +
> > +     rxq->page_pool = page_pool_create(&pp_params);
> > +     if (IS_ERR(rxq->page_pool)) {
> > +             err = PTR_ERR(rxq->page_pool);
> > +             rxq->page_pool = NULL;
> > +             return err;
> > +     }
> > +
> > +     err = xdp_rxq_info_reg(&rxq->xdp_rxq, fep->netdev, rxq->id, 0);
> > +     if (err < 0)
> > +             goto err_free_pp;
> > +
> > +     err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq,
> MEM_TYPE_PAGE_POOL,
> > +                                      rxq->page_pool);
> > +     if (err)
> > +             goto err_unregister_rxq;
> > +
> > +     return 0;
> > +
> > +err_unregister_rxq:
> > +     xdp_rxq_info_unreg(&rxq->xdp_rxq);
> > +err_free_pp:
> > +     page_pool_destroy(rxq->page_pool);
> > +     rxq->page_pool = NULL;
> > +     return err;
> > +}
> > +
> >   static struct bufdesc *
> >   fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
> >                            struct sk_buff *skb, @@ -1285,7 +1335,6 @@
> > fec_stop(struct net_device *ndev)
> >       }
> >   }
> >
> > -
> >   static void
> >   fec_timeout(struct net_device *ndev, unsigned int txqueue)
> >   {
> > @@ -1450,7 +1499,7 @@ static void fec_enet_tx(struct net_device *ndev)
> >               fec_enet_tx_queue(ndev, i);
> >   }
> >
> > -static int
> > +static int __maybe_unused
> >   fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct
> sk_buff *skb)
> >   {
> >       struct  fec_enet_private *fep = netdev_priv(ndev); @@ -1470,8
> > +1519,9 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc
> *bdp, struct sk_buff
> >       return 0;
> >   }
> >
> > -static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
> > -                            struct bufdesc *bdp, u32 length, bool swap)
> > +static bool __maybe_unused
> > +fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
> > +                struct bufdesc *bdp, u32 length, bool swap)
> >   {
> >       struct  fec_enet_private *fep = netdev_priv(ndev);
> >       struct sk_buff *new_skb;
> > @@ -1496,6 +1546,78 @@ static bool fec_enet_copybreak(struct net_device
> *ndev, struct sk_buff **skb,
> >       return true;
> >   }
> >
> > +static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
> > +                             struct bufdesc *bdp, int index) {
> > +     struct page *new_page;
> > +     dma_addr_t phys_addr;
> > +
> > +     new_page = page_pool_dev_alloc_pages(rxq->page_pool);
> > +     WARN_ON(!new_page);
> > +     rxq->rx_skb_info[index].page = new_page;
> > +
> > +     rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM;
> > +     phys_addr = page_pool_get_dma_addr(new_page) +
> FEC_ENET_XDP_HEADROOM;
> > +     bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); }
> > +
> > +static u32
> > +fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
> > +              struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq,
> > +int index) {
> > +     unsigned int sync, len = xdp->data_end - xdp->data;
> > +     u32 ret = FEC_ENET_XDP_PASS;
> > +     struct page *page;
> > +     int err;
> > +     u32 act;
> > +
> > +     act = bpf_prog_run_xdp(prog, xdp);
> > +
> > +     /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
> > +     sync = xdp->data_end - xdp->data_hard_start -
> FEC_ENET_XDP_HEADROOM;
> > +     sync = max(sync, len);
> > +
> > +     switch (act) {
> > +     case XDP_PASS:
> > +             rxq->stats.xdp_pass++;
> > +             ret = FEC_ENET_XDP_PASS;
> > +             break;
> > +
> > +     case XDP_TX:
> > +             rxq->stats.xdp_tx++;
> > +             bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
> > +             fallthrough;
> 
> This fallthrough looks wrong. The next xdp_do_redirect() call will pickup left-
> overs in per CPU bpf_redirect_info.
> 

So before the XDP_TX is implemented, this part of codes should reside below the XDP_REDIRECT case?

> > +
> > +     case XDP_REDIRECT:
> > +             err = xdp_do_redirect(fep->netdev, xdp, prog);
> > +             rxq->stats.xdp_redirect++;
> > -                     dma_unmap_single(&fep->pdev->dev,
...

> > -                                      fec32_to_cpu(bdp->cbd_bufaddr),
> > -                                      FEC_ENET_RX_FRSIZE - fep->rx_align,
> > -                                      DMA_FROM_DEVICE);
> > -             }
> > -
> > -             prefetch(skb->data - NET_IP_ALIGN);
> > +             skb = build_skb(page_address(page), FEC_ENET_RX_FRSIZE);
> 
> This looks wrong, I think FEC_ENET_RX_FRSIZE should be replaced by PAGE_SIZE.
> As build_skb() does:
> 
>   size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> 

Agree. As the current FRSIZE definition did not subtract the sizeof(struct skb_shared_info), I happened to not see the problem during the testing.

Thanks,
Shenwei

> > +             skb_reserve(skb, FEC_ENET_XDP_HEADROOM);
> 
> The skb_reserve looks correct.
> 
> >               skb_put(skb, pkt_len - 4);
> >               data = skb->data;
> > +             page_pool_release_page(rxq->page_pool, page);
> 
> Today page_pool have SKB recycle support (you might have looked at drivers
> that didn't utilize this yet), thus you don't need to release the page
> (page_pool_release_page) here.  Instead you could simply mark the SKB for
> recycling, unless driver does some page refcnt tricks I didn't notice.
> 
>   skb_mark_for_recycle(skb);
> 
> 
> > -             if (!is_copybreak && need_swap)
> > +             if (need_swap)
> >                       swap_buffer(data, pkt_len);
> >
> >   #if !defined(CONFIG_M5272)
> > @@ -1649,16 +1781,6 @@ fec_enet_rx_queue(struct net_device *ndev, int
> > budget, u16 queue_id)
> [...]

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ