[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250331115045.032d2eb7@kernel.org>
Date: Mon, 31 Mar 2025 11:50:45 -0700
From: Jakub Kicinski <kuba@...nel.org>
To: Taehee Yoo <ap420073@...il.com>
Cc: davem@...emloft.net, pabeni@...hat.com, edumazet@...gle.com,
andrew+netdev@...n.ch, horms@...nel.org, michael.chan@...adcom.com,
pavan.chebbi@...adcom.com, ilias.apalodimas@...aro.org, dw@...idwei.uk,
netdev@...r.kernel.org, kuniyu@...zon.com, sdf@...ichev.me,
aleksander.lobakin@...el.com
Subject: Re: [RFC net-next 2/2] eth: bnxt: add support rx side device memory
TCP
On Mon, 31 Mar 2025 11:47:29 +0000 Taehee Yoo wrote:
> Currently, bnxt_en driver satisfies the requirements of the Device
> memory TCP, which is HDS.
> So, it implements rx-side Device memory TCP for bnxt_en driver.
> It requires only converting the page API to netmem API.
> `struct page` for rx-size are changed to `netmem_ref netmem` and
> corresponding functions are changed to a variant of netmem API.
>
> It also passes PP_FLAG_ALLOW_UNREADABLE_NETMEM flag to a parameter of
> page_pool.
> The netmem will be activated only when a user requests devmem TCP.
>
> When netmem is activated, received data is unreadable and netmem is
> disabled, received data is readable.
> But drivers don't need to handle both cases because netmem core API will
> handle it properly.
> So, using proper netmem API is enough for drivers.
> @@ -1352,15 +1367,15 @@ static struct sk_buff *bnxt_copy_data(struct bnxt_napi *bnapi,
> if (!skb)
> return NULL;
>
> - page_pool_dma_sync_for_cpu(rxr->head_pool, page,
> - offset + bp->rx_dma_offset,
> - bp->rx_copybreak);
> + page_pool_dma_sync_netmem_for_cpu(rxr->head_pool, netmem,
> + offset + bp->rx_dma_offset,
> + bp->rx_copybreak);
>
> memcpy(skb->data - NET_IP_ALIGN,
> - bnxt_data_ptr(bp, page, offset) - NET_IP_ALIGN,
> + bnxt_data_ptr(bp, netmem, offset) - NET_IP_ALIGN,
> len + NET_IP_ALIGN);
>
> - page_pool_dma_sync_for_device(rxr->head_pool, page_to_netmem(page),
> + page_pool_dma_sync_for_device(rxr->head_pool, netmem,
> bp->rx_dma_offset, bp->rx_copybreak);
> skb_put(skb, len);
>
Do we check if rx copybreak is enabled before allowing ZC to be enabled?
We can't copybreak with unreadable memory.
> @@ -15912,7 +15941,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
> goto err_reset;
> }
>
> - napi_enable(&bnapi->napi);
> + napi_enable_locked(&bnapi->napi);
> bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
>
> for (i = 0; i < bp->nr_vnics; i++) {
> @@ -15964,7 +15993,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
> bnxt_hwrm_rx_ring_free(bp, rxr, false);
> bnxt_hwrm_rx_agg_ring_free(bp, rxr, false);
> page_pool_disable_direct_recycling(rxr->page_pool);
> - if (bnxt_separate_head_pool())
> + if (bnxt_separate_head_pool(rxr))
> page_pool_disable_direct_recycling(rxr->head_pool);
>
> if (bp->flags & BNXT_FLAG_SHARED_RINGS)
> @@ -15974,7 +16003,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
> * completion is handled in NAPI to guarantee no more DMA on that ring
> * after seeing the completion.
> */
> - napi_disable(&bnapi->napi);
> + napi_disable_locked(&bnapi->napi);
This is a fix right? The IRQ code already calls the queue reset without
rtnl_lock. Let's split it up and submit for net.
> @@ -2863,15 +2865,15 @@ static inline bool bnxt_sriov_cfg(struct bnxt *bp)
> #endif
> }
>
> -static inline u8 *bnxt_data_ptr(struct bnxt *bp, struct page *page,
> +static inline u8 *bnxt_data_ptr(struct bnxt *bp, netmem_ref netmem,
> unsigned int offset)
> {
> - return page_address(page) + offset + bp->rx_offset;
> + return netmem_address(netmem) + offset + bp->rx_offset;
> }
>
> -static inline u8 *bnxt_data(struct page *page, unsigned int offset)
> +static inline u8 *bnxt_data(netmem_ref netmem, unsigned int offset)
> {
> - return page_address(page) + offset;
> + return netmem_address(netmem) + offset;
> }
This is not great, seems like the unification of normal vs agg bd struct
backfires here. unreadable netmem can only be populated in agg bds
right? So why don't we keep the structs separate and avoid the need
to convert from netmem back to a VA?
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
> index 9592d04e0661..85b6df6a9e7f 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
> @@ -18,7 +18,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
> struct xdp_buff *xdp);
> void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget);
> bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
> - struct xdp_buff *xdp, struct page *page, u8 **data_ptr,
> + struct xdp_buff *xdp, netmem_ref netmem, u8 **data_ptr,
> unsigned int *len, u8 *event);
> int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp);
> int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
> @@ -27,7 +27,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
> bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr);
>
> void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
> - u16 cons, struct page *page, unsigned int len,
> + u16 cons, netmem_ref netmem, unsigned int len,
> struct xdp_buff *xdp);
We also shouldn't pass netmem to XDP init, it's strange conceptually.
If we reach XDP it has to be a non-net_iov page.
Powered by blists - more mailing lists