lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d5cca789-c25e-86ad-2579-0dba00e079b3@linux.dev>
Date:   Sat, 5 Aug 2023 18:02:32 +0800
From:   Zhu Yanjun <yanjun.zhu@...ux.dev>
To:     Haiyang Zhang <haiyangz@...rosoft.com>,
        linux-hyperv@...r.kernel.org, netdev@...r.kernel.org
Cc:     decui@...rosoft.com, kys@...rosoft.com, paulros@...rosoft.com,
        olaf@...fle.de, vkuznets@...hat.com, davem@...emloft.net,
        wei.liu@...nel.org, edumazet@...gle.com, kuba@...nel.org,
        pabeni@...hat.com, leon@...nel.org, longli@...rosoft.com,
        ssengar@...ux.microsoft.com, linux-rdma@...r.kernel.org,
        daniel@...earbox.net, john.fastabend@...il.com,
        bpf@...r.kernel.org, ast@...nel.org, sharmaajay@...rosoft.com,
        hawk@...nel.org, tglx@...utronix.de,
        shradhagupta@...ux.microsoft.com, linux-kernel@...r.kernel.org
Subject: Re: [PATCH V6,net-next] net: mana: Add page pool for RX buffers

在 2023/8/5 4:33, Haiyang Zhang 写道:
> Add page pool for RX buffers for faster buffer cycle and reduce CPU
> usage.
> 
> The standard page pool API is used.
> 
> With iperf and 128 threads test, this patch improved the throughput
> by 12-15%, and decreased the IRQ associated CPU's usage from 99-100% to
> 10-50%.

https://www.spinics.net/lists/netdev/msg584734.html

The performance of throughput and cpu utility is very good. I have a 
similar patch series with this in the above link.
And David Miller had the following comments:
"
The system is supposed to hold onto enough atomic memory to absorb all
reasonable situations like this.

If anything a solution to this problem belongs generically somewhere,
not in a driver.  And furthermore looping over an allocation attempt
with a delay is strongly discouraged.
"

Hope your commit can be merged into upstream (pray).

Zhu Yanjun

> 
> Signed-off-by: Haiyang Zhang <haiyangz@...rosoft.com>
> Reviewed-by: Jesse Brandeburg <jesse.brandeburg@...el.com>
> ---
> V6:
> Added perf info as suggested by Jesper Dangaard Brouer
> V5:
> In err path, set page_pool_put_full_page(..., false) as suggested by
> Jakub Kicinski
> V4:
> Add nid setting, remove page_pool_nid_changed(), as suggested by
> Jesper Dangaard Brouer
> V3:
> Update xdp mem model, pool param, alloc as suggested by Jakub Kicinski
> V2:
> Use the standard page pool API as suggested by Jesper Dangaard Brouer
> ---
>   drivers/net/ethernet/microsoft/mana/mana_en.c | 90 +++++++++++++++----
>   include/net/mana/mana.h                       |  3 +
>   2 files changed, 77 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index ac2acc9aca9d..1a4ac1c8736e 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1414,8 +1414,8 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
>   	return skb;
>   }
>   
> -static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
> -			struct mana_rxq *rxq)
> +static void mana_rx_skb(void *buf_va, bool from_pool,
> +			struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq)
>   {
>   	struct mana_stats_rx *rx_stats = &rxq->stats;
>   	struct net_device *ndev = rxq->ndev;
> @@ -1448,6 +1448,9 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
>   	if (!skb)
>   		goto drop;
>   
> +	if (from_pool)
> +		skb_mark_for_recycle(skb);
> +
>   	skb->dev = napi->dev;
>   
>   	skb->protocol = eth_type_trans(skb, ndev);
> @@ -1498,9 +1501,14 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
>   	u64_stats_update_end(&rx_stats->syncp);
>   
>   drop:
> -	WARN_ON_ONCE(rxq->xdp_save_va);
> -	/* Save for reuse */
> -	rxq->xdp_save_va = buf_va;
> +	if (from_pool) {
> +		page_pool_recycle_direct(rxq->page_pool,
> +					 virt_to_head_page(buf_va));
> +	} else {
> +		WARN_ON_ONCE(rxq->xdp_save_va);
> +		/* Save for reuse */
> +		rxq->xdp_save_va = buf_va;
> +	}
>   
>   	++ndev->stats.rx_dropped;
>   
> @@ -1508,11 +1516,13 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
>   }
>   
>   static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
> -			     dma_addr_t *da, bool is_napi)
> +			     dma_addr_t *da, bool *from_pool, bool is_napi)
>   {
>   	struct page *page;
>   	void *va;
>   
> +	*from_pool = false;
> +
>   	/* Reuse XDP dropped page if available */
>   	if (rxq->xdp_save_va) {
>   		va = rxq->xdp_save_va;
> @@ -1533,17 +1543,22 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
>   			return NULL;
>   		}
>   	} else {
> -		page = dev_alloc_page();
> +		page = page_pool_dev_alloc_pages(rxq->page_pool);
>   		if (!page)
>   			return NULL;
>   
> +		*from_pool = true;
>   		va = page_to_virt(page);
>   	}
>   
>   	*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
>   			     DMA_FROM_DEVICE);
>   	if (dma_mapping_error(dev, *da)) {
> -		put_page(virt_to_head_page(va));
> +		if (*from_pool)
> +			page_pool_put_full_page(rxq->page_pool, page, false);
> +		else
> +			put_page(virt_to_head_page(va));
> +
>   		return NULL;
>   	}
>   
> @@ -1552,21 +1567,25 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
>   
>   /* Allocate frag for rx buffer, and save the old buf */
>   static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq,
> -			       struct mana_recv_buf_oob *rxoob, void **old_buf)
> +			       struct mana_recv_buf_oob *rxoob, void **old_buf,
> +			       bool *old_fp)
>   {
> +	bool from_pool;
>   	dma_addr_t da;
>   	void *va;
>   
> -	va = mana_get_rxfrag(rxq, dev, &da, true);
> +	va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true);
>   	if (!va)
>   		return;
>   
>   	dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
>   			 DMA_FROM_DEVICE);
>   	*old_buf = rxoob->buf_va;
> +	*old_fp = rxoob->from_pool;
>   
>   	rxoob->buf_va = va;
>   	rxoob->sgl[0].address = da;
> +	rxoob->from_pool = from_pool;
>   }
>   
>   static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
> @@ -1580,6 +1599,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
>   	struct device *dev = gc->dev;
>   	void *old_buf = NULL;
>   	u32 curr, pktlen;
> +	bool old_fp;
>   
>   	apc = netdev_priv(ndev);
>   
> @@ -1622,12 +1642,12 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
>   	rxbuf_oob = &rxq->rx_oobs[curr];
>   	WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
>   
> -	mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf);
> +	mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp);
>   
>   	/* Unsuccessful refill will have old_buf == NULL.
>   	 * In this case, mana_rx_skb() will drop the packet.
>   	 */
> -	mana_rx_skb(old_buf, oob, rxq);
> +	mana_rx_skb(old_buf, old_fp, oob, rxq);
>   
>   drop:
>   	mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
> @@ -1887,6 +1907,7 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
>   	struct mana_recv_buf_oob *rx_oob;
>   	struct device *dev = gc->dev;
>   	struct napi_struct *napi;
> +	struct page *page;
>   	int i;
>   
>   	if (!rxq)
> @@ -1919,10 +1940,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
>   		dma_unmap_single(dev, rx_oob->sgl[0].address,
>   				 rx_oob->sgl[0].size, DMA_FROM_DEVICE);
>   
> -		put_page(virt_to_head_page(rx_oob->buf_va));
> +		page = virt_to_head_page(rx_oob->buf_va);
> +
> +		if (rx_oob->from_pool)
> +			page_pool_put_full_page(rxq->page_pool, page, false);
> +		else
> +			put_page(page);
> +
>   		rx_oob->buf_va = NULL;
>   	}
>   
> +	page_pool_destroy(rxq->page_pool);
> +
>   	if (rxq->gdma_rq)
>   		mana_gd_destroy_queue(gc, rxq->gdma_rq);
>   
> @@ -1933,18 +1962,20 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key,
>   			    struct mana_rxq *rxq, struct device *dev)
>   {
>   	struct mana_port_context *mpc = netdev_priv(rxq->ndev);
> +	bool from_pool = false;
>   	dma_addr_t da;
>   	void *va;
>   
>   	if (mpc->rxbufs_pre)
>   		va = mana_get_rxbuf_pre(rxq, &da);
>   	else
> -		va = mana_get_rxfrag(rxq, dev, &da, false);
> +		va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false);
>   
>   	if (!va)
>   		return -ENOMEM;
>   
>   	rx_oob->buf_va = va;
> +	rx_oob->from_pool = from_pool;
>   
>   	rx_oob->sgl[0].address = da;
>   	rx_oob->sgl[0].size = rxq->datasize;
> @@ -2014,6 +2045,26 @@ static int mana_push_wqe(struct mana_rxq *rxq)
>   	return 0;
>   }
>   
> +static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
> +{
> +	struct page_pool_params pprm = {};
> +	int ret;
> +
> +	pprm.pool_size = RX_BUFFERS_PER_QUEUE;
> +	pprm.nid = gc->numa_node;
> +	pprm.napi = &rxq->rx_cq.napi;
> +
> +	rxq->page_pool = page_pool_create(&pprm);
> +
> +	if (IS_ERR(rxq->page_pool)) {
> +		ret = PTR_ERR(rxq->page_pool);
> +		rxq->page_pool = NULL;
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
>   static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
>   					u32 rxq_idx, struct mana_eq *eq,
>   					struct net_device *ndev)
> @@ -2043,6 +2094,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
>   	mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
>   			   &rxq->headroom);
>   
> +	/* Create page pool for RX queue */
> +	err = mana_create_page_pool(rxq, gc);
> +	if (err) {
> +		netdev_err(ndev, "Create page pool err:%d\n", err);
> +		goto out;
> +	}
> +
>   	err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
>   	if (err)
>   		goto out;
> @@ -2114,8 +2172,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
>   
>   	WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx,
>   				 cq->napi.napi_id));
> -	WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq,
> -					   MEM_TYPE_PAGE_SHARED, NULL));
> +	WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
> +					   rxq->page_pool));
>   
>   	napi_enable(&cq->napi);
>   
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index 024ad8ddb27e..b12859511839 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -280,6 +280,7 @@ struct mana_recv_buf_oob {
>   	struct gdma_wqe_request wqe_req;
>   
>   	void *buf_va;
> +	bool from_pool; /* allocated from a page pool */
>   
>   	/* SGL of the buffer going to be sent has part of the work request. */
>   	u32 num_sge;
> @@ -330,6 +331,8 @@ struct mana_rxq {
>   	bool xdp_flush;
>   	int xdp_rc; /* XDP redirect return code */
>   
> +	struct page_pool *page_pool;
> +
>   	/* MUST BE THE LAST MEMBER:
>   	 * Each receive buffer has an associated mana_recv_buf_oob.
>   	 */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ