[<prev] [next>] [day] [month] [year] [list]
Message-ID: <PH3PPF67C992ECC2AD31E5DB372B564E20A9106A@PH3PPF67C992ECC.namprd11.prod.outlook.com>
Date: Tue, 2 Sep 2025 06:06:02 +0000
From: "Singh, PriyaX" <priyax.singh@...el.com>
To: "intel-wired-lan-bounces@...osl.org" <intel-wired-lan-bounces@...osl.org>
CC: "Fijalkowski, Maciej" <maciej.fijalkowski@...el.com>, "Lobakin,
Aleksander" <aleksander.lobakin@...el.com>, "Keller, Jacob E"
<jacob.e.keller@...el.com>, "Zaremba, Larysa" <larysa.zaremba@...el.com>,
"(Meetup2) MTR-FM1-AVLAB1" <fm1avlab1@...el.com>, "Kitszel, Przemyslaw"
<przemyslaw.kitszel@...el.com>, "Nguyen, Anthony L"
<anthony.l.nguyen@...el.com>, "Kubiak, Michal" <michal.kubiak@...el.com>,
"Buvaneswaran, Sujai" <sujai.buvaneswaran@...el.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: RE: [Intel-wired-lan] [PATCH iwl-next v2 2/3] ice: drop page
splitting and recycling
> As part of the transition toward Page Pool integration, remove the legacy page
> splitting and recycling logic from the ice driver. This mirrors the approach
> taken in commit 920d86f3c552 ("iavf: drop page splitting and recycling").
>
> The previous model attempted to reuse partially consumed pages by splitting
> them and tracking their usage across descriptors. While this was once a
> memory optimization, it introduced significant complexity and overhead in
> the Rx path, including:
> - Manual refcount management and page reuse heuristics;
> - Per-descriptor buffer shuffling, which could involve moving dozens
> of `ice_rx_buf` structures per NAPI cycle;
> - Increased branching and cache pressure in the hotpath.
>
> This change simplifies the Rx logic by always allocating fresh pages and letting
> the networking stack handle their lifecycle. Although this may temporarily
> reduce performance (up to ~98% in some XDP cases), it greatly improves
> maintainability and paves the way for Page Pool, which will restore and
> exceed previous performance levels.
>
> The `ice_rx_buf` array is retained for now to minimize diffstat and ease future
> replacement with a shared buffer abstraction.
>
> Co-developed-by: Alexander Lobakin <aleksander.lobakin@...el.com>
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@...el.com>
> Reviewed-by: Alexander Lobakin <aleksander.lobakin@...el.com>
> Signed-off-by: Michal Kubiak <michal.kubiak@...el.com>
> ---
> drivers/net/ethernet/intel/ice/ice.h | 2 +
> drivers/net/ethernet/intel/ice/ice_base.c | 26 ++--
> drivers/net/ethernet/intel/ice/ice_txrx.c | 136 ++----------------
> drivers/net/ethernet/intel/ice/ice_txrx.h | 8 --
> drivers/net/ethernet/intel/ice/ice_virtchnl.c | 5 +-
> 5 files changed, 25 insertions(+), 152 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ice/ice.h
> b/drivers/net/ethernet/intel/ice/ice.h
> index d67dc2f02acf..bf37c8420828 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -369,6 +369,8 @@ struct ice_vsi {
> spinlock_t arfs_lock; /* protects aRFS hash table and filter state */
> atomic_t *arfs_last_fltr_id;
>
> + u16 max_frame;
> +
> struct ice_aqc_vsi_props info; /* VSI properties */
> struct ice_vsi_vlan_info vlan_info; /* vlan config to be restored
> */
>
> diff --git a/drivers/net/ethernet/intel/ice/ice_base.c
> b/drivers/net/ethernet/intel/ice/ice_base.c
> index db2fa4a6bc67..aa75425d92e6 100644
> --- a/drivers/net/ethernet/intel/ice/ice_base.c
> +++ b/drivers/net/ethernet/intel/ice/ice_base.c
> @@ -391,7 +391,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
> /* Receive Packet Data Buffer Size.
> * The Packet Data Buffer Size is defined in 128 byte units.
> */
> - rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
> + rlan_ctx.dbuf = DIV_ROUND_UP(ICE_RXBUF_3072,
> BIT_ULL(ICE_RLAN_CTX_DBUF_S));
>
> /* use 32 byte descriptors */
> @@ -432,8 +432,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
> /* Max packet size for this queue - must not be set to a larger value
> * than 5 x DBUF
> */
> - rlan_ctx.rxmax = min_t(u32, ring->max_frame,
> - ICE_MAX_CHAINED_RX_BUFS * ring-
> >rx_buf_len);
> + rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
> + ICE_MAX_CHAINED_RX_BUFS *
> ICE_RXBUF_3072);
>
> /* Rx queue threshold in units of 64 */
> rlan_ctx.lrxqthresh = 1;
> @@ -504,7 +504,7 @@ static unsigned int ice_get_frame_sz(struct ice_rx_ring
> *rx_ring) #if (PAGE_SIZE >= 8192)
> frame_sz = rx_ring->rx_buf_len;
> #else
> - frame_sz = ice_rx_pg_size(rx_ring) / 2;
> + frame_sz = PAGE_SIZE / 2;
> #endif
>
> return frame_sz;
> @@ -520,6 +520,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) {
> struct device *dev = ice_pf_to_dev(ring->vsi->back);
> u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
> + u32 rx_buf_len;
> int err;
>
> if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) {
> @@ -527,7 +528,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
> err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring-
> >netdev,
> ring->q_index,
> ring->q_vector-
> >napi.napi_id,
> - ring->rx_buf_len);
> + ICE_RXBUF_3072);
> if (err)
> return err;
> }
> @@ -536,12 +537,12 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
> if (ring->xsk_pool) {
> xdp_rxq_info_unreg(&ring->xdp_rxq);
>
> - ring->rx_buf_len =
> + rx_buf_len =
> xsk_pool_get_rx_frame_size(ring->xsk_pool);
> err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring-
> >netdev,
> ring->q_index,
> ring->q_vector-
> >napi.napi_id,
> - ring->rx_buf_len);
> + rx_buf_len);
> if (err)
> return err;
> err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
> @@ -559,7 +560,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
> err = __xdp_rxq_info_reg(&ring->xdp_rxq,
> ring->netdev,
> ring->q_index,
> ring->q_vector-
> >napi.napi_id,
> - ring->rx_buf_len);
> + ICE_RXBUF_3072);
> if (err)
> return err;
> }
> @@ -631,17 +632,14 @@ int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16
> q_idx) static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi, struct ice_rx_ring
> *ring) {
> if (!vsi->netdev) {
> - ring->max_frame = ICE_MAX_FRAME_LEGACY_RX;
> - ring->rx_buf_len = ICE_RXBUF_1664;
> + vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
> #if (PAGE_SIZE < 8192)
> } else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
> (vsi->netdev->mtu <= ETH_DATA_LEN)) {
> - ring->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
> - ring->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
> + vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
> #endif
> } else {
> - ring->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
> - ring->rx_buf_len = ICE_RXBUF_3072;
> + vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
> }
> }
>
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c
> b/drivers/net/ethernet/intel/ice/ice_txrx.c
> index fb1d14bd20d1..b640c131b6bd 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> @@ -412,13 +412,13 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
> */
> dma_sync_single_range_for_cpu(dev, rx_buf->dma,
> rx_buf->page_offset,
> - rx_ring->rx_buf_len,
> + ICE_RXBUF_3072,
> DMA_FROM_DEVICE);
>
> /* free resources associated with mapping */
> - dma_unmap_page_attrs(dev, rx_buf->dma,
> ice_rx_pg_size(rx_ring),
> + dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE,
> DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
> - __page_frag_cache_drain(rx_buf->page, rx_buf-
> >pagecnt_bias);
> + __free_page(rx_buf->page);
>
> rx_buf->page = NULL;
> rx_buf->page_offset = 0;
> @@ -672,10 +672,6 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring,
> struct ice_rx_buf *bi)
> struct page *page = bi->page;
> dma_addr_t dma;
>
> - /* since we are recycling buffers we should seldom need to alloc */
> - if (likely(page))
> - return true;
> -
> /* alloc new page for storage */
> page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
> if (unlikely(!page)) {
> @@ -684,7 +680,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring,
> struct ice_rx_buf *bi)
> }
>
> /* map page for use */
> - dma = dma_map_page_attrs(rx_ring->dev, page, 0,
> ice_rx_pg_size(rx_ring),
> + dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
> DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
>
> /* if mapping failed free memory back to system since @@ -700,7
> +696,6 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct
> ice_rx_buf *bi)
> bi->page = page;
> bi->page_offset = rx_ring->rx_offset;
> page_ref_add(page, USHRT_MAX - 1);
> - bi->pagecnt_bias = USHRT_MAX;
>
> return true;
> }
> @@ -771,7 +766,7 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring,
> unsigned int cleaned_count)
> /* sync the buffer for use by the device */
> dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
> bi->page_offset,
> - rx_ring->rx_buf_len,
> + ICE_RXBUF_3072,
> DMA_FROM_DEVICE);
>
> /* Refresh the desc even if buffer_addrs didn't change @@ -
> 800,69 +795,6 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring,
> unsigned int cleaned_count)
> return !!cleaned_count;
> }
>
> -/**
> - * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
> - * @rx_buf: Rx buffer to adjust
> - * @size: Size of adjustment
> - *
> - * Update the offset within page so that Rx buf will be ready to be reused.
> - * For systems with PAGE_SIZE < 8192 this function will flip the page offset
> - * so the second half of page assigned to Rx buffer will be used, otherwise
> - * the offset is moved by "size" bytes
> - */
> -static void
> -ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) -{ -
> #if (PAGE_SIZE < 8192)
> - /* flip page offset to other buffer */
> - rx_buf->page_offset ^= size;
> -#else
> - /* move offset up to the next cache line */
> - rx_buf->page_offset += size;
> -#endif
> -}
> -
> -/**
> - * ice_can_reuse_rx_page - Determine if page can be reused for another Rx
> - * @rx_buf: buffer containing the page
> - *
> - * If page is reusable, we have a green light for calling ice_reuse_rx_page,
> - * which will assign the current buffer to the buffer that next_to_alloc is
> - * pointing to; otherwise, the DMA mapping needs to be destroyed and
> - * page freed
> - */
> -static bool
> -ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) -{
> - unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
> - struct page *page = rx_buf->page;
> -
> - /* avoid re-using remote and pfmemalloc pages */
> - if (!dev_page_is_reusable(page))
> - return false;
> -
> - /* if we are only owner of page we can reuse it */
> - if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1))
> - return false;
> -#if (PAGE_SIZE >= 8192)
> -#define ICE_LAST_OFFSET \
> - (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072)
> - if (rx_buf->page_offset > ICE_LAST_OFFSET)
> - return false;
> -#endif /* PAGE_SIZE >= 8192) */
> -
> - /* If we have drained the page fragment pool we need to update
> - * the pagecnt_bias and page count so that we fully restock the
> - * number of references the driver holds.
> - */
> - if (unlikely(pagecnt_bias == 1)) {
> - page_ref_add(page, USHRT_MAX - 1);
> - rx_buf->pagecnt_bias = USHRT_MAX;
> - }
> -
> - return true;
> -}
> -
> /**
> * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag
> * @rx_ring: Rx descriptor ring to transact packets on @@ -901,35 +833,6
> @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> return 0;
> }
>
> -/**
> - * ice_reuse_rx_page - page flip buffer and store it back on the ring
> - * @rx_ring: Rx descriptor ring to store buffers on
> - * @old_buf: donor buffer to have page reused
> - *
> - * Synchronizes page for reuse by the adapter
> - */
> -static void
> -ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) -{
> - u16 nta = rx_ring->next_to_alloc;
> - struct ice_rx_buf *new_buf;
> -
> - new_buf = &rx_ring->rx_buf[nta];
> -
> - /* update, and store next to alloc */
> - nta++;
> - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
> -
> - /* Transfer page from old buffer to new buffer.
> - * Move each member individually to avoid possible store
> - * forwarding stalls and unnecessary copy of skb.
> - */
> - new_buf->dma = old_buf->dma;
> - new_buf->page = old_buf->page;
> - new_buf->page_offset = old_buf->page_offset;
> - new_buf->pagecnt_bias = old_buf->pagecnt_bias;
> -}
> -
> /**
> * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
> * @rx_ring: Rx descriptor ring to transact packets on @@ -955,9 +858,6 @@
> ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
> rx_buf->page_offset, size,
> DMA_FROM_DEVICE);
>
> - /* We have pulled a buffer for use, so decrement pagecnt_bias */
> - rx_buf->pagecnt_bias--;
> -
> return rx_buf;
> }
>
> @@ -1053,16 +953,10 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct
> ice_rx_buf *rx_buf)
> if (!rx_buf)
> return;
>
> - if (ice_can_reuse_rx_page(rx_buf)) {
> - /* hand second half of page back to the ring */
> - ice_reuse_rx_page(rx_ring, rx_buf);
> - } else {
> - /* we are not reusing the buffer so unmap it */
> - dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
> - ice_rx_pg_size(rx_ring),
> DMA_FROM_DEVICE,
> - ICE_RX_DMA_ATTR);
> - __page_frag_cache_drain(rx_buf->page, rx_buf-
> >pagecnt_bias);
> - }
> + /* we are not reusing the buffer so unmap it */
> + dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
> + PAGE_SIZE, DMA_FROM_DEVICE,
> + ICE_RX_DMA_ATTR);
>
> /* clear contents of buffer_info */
> rx_buf->page = NULL;
> @@ -1085,27 +979,15 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct
> ice_rx_buf *rx_buf) static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring,
> struct xdp_buff *xdp,
> u32 ntc, u32 verdict)
> {
> - u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
> u32 idx = rx_ring->first_desc;
> u32 cnt = rx_ring->count;
> struct ice_rx_buf *buf;
> - int i = 0;
>
> while (idx != ntc) {
> buf = &rx_ring->rx_buf[idx];
> if (++idx == cnt)
> idx = 0;
>
> - /* An XDP program could release fragments from the end of
> the
> - * buffer. For these, we need to keep the pagecnt_bias as-is.
> - * To do this, only adjust pagecnt_bias for fragments up to
> - * the total remaining after the XDP program has run.
> - */
> - if (verdict != ICE_XDP_CONSUMED)
> - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
> - else if (i++ <= nr_frags)
> - buf->pagecnt_bias++;
> -
> ice_put_rx_buf(rx_ring, buf);
> }
>
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h
> b/drivers/net/ethernet/intel/ice/ice_txrx.h
> index 2fd8e78178a2..7c696f7c598b 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> @@ -202,7 +202,6 @@ struct ice_rx_buf {
> struct page *page;
> unsigned int page_offset;
> unsigned int pgcnt;
> - unsigned int pagecnt_bias;
> };
>
> struct ice_q_stats {
> @@ -358,7 +357,6 @@ struct ice_rx_ring {
> struct ice_tx_ring *xdp_ring;
> struct ice_rx_ring *next; /* pointer to next ring in q_vector */
> struct xsk_buff_pool *xsk_pool;
> - u16 max_frame;
> u16 rx_buf_len;
> dma_addr_t dma; /* physical address of ring */
> u8 dcb_tc; /* Traffic class of ring */
> @@ -479,15 +477,9 @@ struct ice_coalesce_stored {
>
> static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring) { -#if
> (PAGE_SIZE < 8192)
> - if (ring->rx_buf_len > (PAGE_SIZE / 2))
> - return 1;
> -#endif
> return 0;
> }
>
> -#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
> -
> union ice_32b_rx_flex_desc;
>
> void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 num_descs); diff -
> -git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
> b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
> index 257967273079..0090099917ea 100644
> --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
> +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
> @@ -2086,18 +2086,17 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8
> *msg)
> (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
> qpi->rxq.databuffer_size < 1024))
> goto error_param;
> - ring->rx_buf_len = qpi->rxq.databuffer_size;
> if (qpi->rxq.max_pkt_size > max_frame_size ||
> qpi->rxq.max_pkt_size < 64)
> goto error_param;
>
> - ring->max_frame = qpi->rxq.max_pkt_size;
> + vsi->max_frame = qpi->rxq.max_pkt_size;
> /* add space for the port VLAN since the VF driver is
> * not expected to account for it in the MTU
> * calculation
> */
> if (ice_vf_is_port_vlan_ena(vf))
> - ring->max_frame += VLAN_HLEN;
> + vsi->max_frame += VLAN_HLEN;
>
> if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
> dev_warn(ice_pf_to_dev(pf), "VF-%d failed to
> configure RX queue %d\n",
> --
> 2.45.2
Tested-by: Priya Singh <priyax.singh@...el.com>
Powered by blists - more mailing lists