lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAHS8izNnNJZsEXwZ07zhpn8AjxhGGcm9vyt8uFos1rVvn66qsQ@mail.gmail.com>
Date: Wed, 5 Mar 2025 16:13:32 -0800
From: Mina Almasry <almasrymina@...gle.com>
To: Alexander Lobakin <aleksander.lobakin@...el.com>
Cc: intel-wired-lan@...ts.osuosl.org, Michal Kubiak <michal.kubiak@...el.com>, 
	Maciej Fijalkowski <maciej.fijalkowski@...el.com>, Tony Nguyen <anthony.l.nguyen@...el.com>, 
	Przemek Kitszel <przemyslaw.kitszel@...el.com>, Andrew Lunn <andrew+netdev@...n.ch>, 
	"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, 
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>, 
	Daniel Borkmann <daniel@...earbox.net>, Jesper Dangaard Brouer <hawk@...nel.org>, 
	John Fastabend <john.fastabend@...il.com>, Simon Horman <horms@...nel.org>, bpf@...r.kernel.org, 
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next 01/16] libeth: convert to netmem

On Wed, Mar 5, 2025 at 8:23 AM Alexander Lobakin
<aleksander.lobakin@...el.com> wrote:
>
> Back when the libeth Rx core was initially written, devmem was a draft
> and netmem_ref didn't exist in the mainline. Now that it's here, make
> libeth MP-agnostic before introducing any new code or any new library
> users.
> When it's known that the created PP/FQ is for header buffers, use faster
> "unsafe" underscored netmem <--> virt accessors as netmem_is_net_iov()
> is always false in that case, but consumes some cycles (bit test +
> true branch).
> Misc: replace explicit EXPORT_SYMBOL_NS_GPL("NS") with
> DEFAULT_SYMBOL_NAMESPACE.
>
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@...el.com>
> ---
>  include/net/libeth/rx.h                       | 22 +++++++------
>  drivers/net/ethernet/intel/iavf/iavf_txrx.c   | 14 ++++----
>  .../ethernet/intel/idpf/idpf_singleq_txrx.c   |  2 +-
>  drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 33 +++++++++++--------
>  drivers/net/ethernet/intel/libeth/rx.c        | 20 ++++++-----
>  5 files changed, 51 insertions(+), 40 deletions(-)
>
> diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
> index ab05024be518..7d5dc58984b1 100644
> --- a/include/net/libeth/rx.h
> +++ b/include/net/libeth/rx.h
> @@ -1,5 +1,5 @@
>  /* SPDX-License-Identifier: GPL-2.0-only */
> -/* Copyright (C) 2024 Intel Corporation */
> +/* Copyright (C) 2024-2025 Intel Corporation */
>
>  #ifndef __LIBETH_RX_H
>  #define __LIBETH_RX_H
> @@ -31,7 +31,7 @@
>
>  /**
>   * struct libeth_fqe - structure representing an Rx buffer (fill queue element)
> - * @page: page holding the buffer
> + * @netmem: network memory reference holding the buffer
>   * @offset: offset from the page start (to the headroom)
>   * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
>   *
> @@ -40,7 +40,7 @@
>   * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
>   */
>  struct libeth_fqe {
> -       struct page             *page;
> +       netmem_ref              netmem;
>         u32                     offset;
>         u32                     truesize;
>  } __aligned_largest;
> @@ -102,15 +102,16 @@ static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
>         struct libeth_fqe *buf = &fq->fqes[i];
>
>         buf->truesize = fq->truesize;
> -       buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize);
> -       if (unlikely(!buf->page))
> +       buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset,
> +                                                &buf->truesize);
> +       if (unlikely(!buf->netmem))
>                 return DMA_MAPPING_ERROR;
>
> -       return page_pool_get_dma_addr(buf->page) + buf->offset +
> +       return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset +
>                fq->pp->p.offset;
>  }
>
> -void libeth_rx_recycle_slow(struct page *page);
> +void libeth_rx_recycle_slow(netmem_ref netmem);
>
>  /**
>   * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
> @@ -126,18 +127,19 @@ void libeth_rx_recycle_slow(struct page *page);
>  static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
>                                           u32 len)
>  {
> -       struct page *page = fqe->page;
> +       netmem_ref netmem = fqe->netmem;
>
>         /* Very rare, but possible case. The most common reason:
>          * the last fragment contained FCS only, which was then
>          * stripped by the HW.
>          */
>         if (unlikely(!len)) {
> -               libeth_rx_recycle_slow(page);
> +               libeth_rx_recycle_slow(netmem);

I think before this patch this would have expanded to:

page_pool_put_full_page(pool, page, true);

But now I think it expands to:

page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);

Is the switch from true to false intentional? Is this a slow path so
it doesn't matter?

>                 return false;
>         }
>
> -       page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len);
> +       page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem,
> +                                         fqe->offset, len);
>
>         return true;
>  }
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> index 422312b8b54a..35d353d38129 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> @@ -723,7 +723,7 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
>         for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) {
>                 const struct libeth_fqe *rx_fqes = &rx_ring->rx_fqes[i];
>
> -               page_pool_put_full_page(rx_ring->pp, rx_fqes->page, false);
> +               libeth_rx_recycle_slow(rx_fqes->netmem);
>
>                 if (unlikely(++i == rx_ring->count))
>                         i = 0;
> @@ -1197,10 +1197,11 @@ static void iavf_add_rx_frag(struct sk_buff *skb,
>                              const struct libeth_fqe *rx_buffer,
>                              unsigned int size)
>  {
> -       u32 hr = rx_buffer->page->pp->p.offset;
> +       u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset;
>
> -       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
> -                       rx_buffer->offset + hr, size, rx_buffer->truesize);
> +       skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags,
> +                              rx_buffer->netmem, rx_buffer->offset + hr,
> +                              size, rx_buffer->truesize);
>  }
>
>  /**
> @@ -1214,12 +1215,13 @@ static void iavf_add_rx_frag(struct sk_buff *skb,
>  static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer,
>                                       unsigned int size)
>  {
> -       u32 hr = rx_buffer->page->pp->p.offset;
> +       struct page *buf_page = __netmem_to_page(rx_buffer->netmem);
> +       u32 hr = buf_page->pp->p.offset;
>         struct sk_buff *skb;
>         void *va;
>
>         /* prefetch first cache line of first page */
> -       va = page_address(rx_buffer->page) + rx_buffer->offset;
> +       va = page_address(buf_page) + rx_buffer->offset;
>         net_prefetch(va + hr);
>
>         /* build an skb around the page buffer */
> diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
> index eae1b6f474e6..aeb2ca5f5a0a 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
> +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
> @@ -1009,7 +1009,7 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
>                         break;
>
>  skip_data:
> -               rx_buf->page = NULL;
> +               rx_buf->netmem = 0;
>
>                 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
>                 cleaned_count++;
> diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> index bdf52cef3891..6254806c2072 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> @@ -382,12 +382,12 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport)
>   */
>  static void idpf_rx_page_rel(struct libeth_fqe *rx_buf)
>  {
> -       if (unlikely(!rx_buf->page))
> +       if (unlikely(!rx_buf->netmem))
>                 return;
>
> -       page_pool_put_full_page(rx_buf->page->pp, rx_buf->page, false);
> +       libeth_rx_recycle_slow(rx_buf->netmem);
>
> -       rx_buf->page = NULL;
> +       rx_buf->netmem = 0;
>         rx_buf->offset = 0;
>  }
>
> @@ -3096,10 +3096,10 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
>  void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
>                       unsigned int size)
>  {
> -       u32 hr = rx_buf->page->pp->p.offset;
> +       u32 hr = netmem_get_pp(rx_buf->netmem)->p.offset;
>
> -       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
> -                       rx_buf->offset + hr, size, rx_buf->truesize);
> +       skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, rx_buf->netmem,
> +                              rx_buf->offset + hr, size, rx_buf->truesize);
>  }
>
>  /**
> @@ -3122,16 +3122,20 @@ static u32 idpf_rx_hsplit_wa(const struct libeth_fqe *hdr,
>                              struct libeth_fqe *buf, u32 data_len)
>  {
>         u32 copy = data_len <= L1_CACHE_BYTES ? data_len : ETH_HLEN;
> +       struct page *hdr_page, *buf_page;
>         const void *src;
>         void *dst;
>
> -       if (!libeth_rx_sync_for_cpu(buf, copy))
> +       if (unlikely(netmem_is_net_iov(buf->netmem)) ||
> +           !libeth_rx_sync_for_cpu(buf, copy))
>                 return 0;
>

I could not immediately understand why you need a netmem_is_net_iov
check here. libeth_rx_sync_for_cpu will delegate to
page_pool_dma_sync_netmem_for_cpu which should do the right thing
regardless of whether the netmem is a page or net_iov, right? Is this
to save some cycles?

--
Thanks,
Mina

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ