lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZEGd5QHTInP8WRlZ@boxer>
Date:   Thu, 20 Apr 2023 22:17:41 +0200
From:   Maciej Fijalkowski <maciej.fijalkowski@...el.com>
To:     Gerhard Engleder <gerhard@...leder-embedded.com>
CC:     <netdev@...r.kernel.org>, <bpf@...r.kernel.org>,
        <davem@...emloft.net>, <kuba@...nel.org>, <edumazet@...gle.com>,
        <pabeni@...hat.com>, <bjorn@...nel.org>,
        <magnus.karlsson@...el.com>, <jonathan.lemon@...il.com>
Subject: Re: [PATCH net-next v3 6/6] tsnep: Add XDP socket zero-copy TX
 support

On Tue, Apr 18, 2023 at 09:04:59PM +0200, Gerhard Engleder wrote:
> Send and complete XSK pool frames within TX NAPI context. NAPI context
> is triggered by ndo_xsk_wakeup.
> 
> Test results with A53 1.2GHz:
> 
> xdpsock txonly copy mode:
>                    pps            pkts           1.00
> tx                 284,409        11,398,144
> Two CPUs with 100% and 10% utilization.
> 
> xdpsock txonly zero-copy mode:
>                    pps            pkts           1.00
> tx                 511,929        5,890,368
> Two CPUs with 100% and 1% utilization.

Hmm, I think l2fwd ZC numbers should be included here not in the previous
patch?

> 
> Packet rate increases and CPU utilization is reduced.
> 
> Signed-off-by: Gerhard Engleder <gerhard@...leder-embedded.com>
> ---
>  drivers/net/ethernet/engleder/tsnep.h      |   2 +
>  drivers/net/ethernet/engleder/tsnep_main.c | 127 +++++++++++++++++++--
>  2 files changed, 119 insertions(+), 10 deletions(-)
> 

(...)

> +static int tsnep_xdp_tx_map_zc(struct xdp_desc *xdpd, struct tsnep_tx *tx)
> +{
> +	struct tsnep_tx_entry *entry;
> +	dma_addr_t dma;
> +
> +	entry = &tx->entry[tx->write];
> +	entry->zc = true;
> +
> +	dma = xsk_buff_raw_get_dma(tx->xsk_pool, xdpd->addr);
> +	xsk_buff_raw_dma_sync_for_device(tx->xsk_pool, dma, xdpd->len);
> +
> +	entry->type = TSNEP_TX_TYPE_XSK;
> +	entry->len = xdpd->len;
> +
> +	entry->desc->tx = __cpu_to_le64(dma);
> +
> +	return xdpd->len;
> +}
> +
> +static void tsnep_xdp_xmit_frame_ring_zc(struct xdp_desc *xdpd,
> +					 struct tsnep_tx *tx)
> +{
> +	int length;
> +
> +	length = tsnep_xdp_tx_map_zc(xdpd, tx);
> +
> +	tsnep_tx_activate(tx, tx->write, length, true);
> +	tx->write = (tx->write + 1) & TSNEP_RING_MASK;
> +}
> +
> +static void tsnep_xdp_xmit_zc(struct tsnep_tx *tx)
> +{
> +	int desc_available = tsnep_tx_desc_available(tx);
> +	struct xdp_desc *descs = tx->xsk_pool->tx_descs;
> +	int batch, i;
> +
> +	/* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS
> +	 * will be available for normal TX path and queue is stopped there if
> +	 * necessary
> +	 */
> +	if (desc_available <= (MAX_SKB_FRAGS + 1))
> +		return;
> +	desc_available -= MAX_SKB_FRAGS + 1;
> +
> +	batch = xsk_tx_peek_release_desc_batch(tx->xsk_pool, desc_available);
> +	for (i = 0; i < batch; i++)
> +		tsnep_xdp_xmit_frame_ring_zc(&descs[i], tx);
> +
> +	if (batch) {
> +		/* descriptor properties shall be valid before hardware is
> +		 * notified
> +		 */
> +		dma_wmb();
> +
> +		tsnep_xdp_xmit_flush(tx);
> +	}
> +}
> +
>  static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  {
>  	struct tsnep_tx_entry *entry;
>  	struct netdev_queue *nq;
> +	int xsk_frames = 0;
>  	int budget = 128;
>  	int length;
>  	int count;
> @@ -676,7 +771,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  		if ((entry->type & TSNEP_TX_TYPE_SKB) &&
>  		    skb_shinfo(entry->skb)->nr_frags > 0)
>  			count += skb_shinfo(entry->skb)->nr_frags;
> -		else if (!(entry->type & TSNEP_TX_TYPE_SKB) &&
> +		else if ((entry->type & TSNEP_TX_TYPE_XDP) &&
>  			 xdp_frame_has_frags(entry->xdpf))
>  			count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags;
>  
> @@ -705,9 +800,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  
>  		if (entry->type & TSNEP_TX_TYPE_SKB)
>  			napi_consume_skb(entry->skb, napi_budget);
> -		else
> +		else if (entry->type & TSNEP_TX_TYPE_XDP)
>  			xdp_return_frame_rx_napi(entry->xdpf);
> -		/* xdpf is union with skb */
> +		else
> +			xsk_frames++;
> +		/* xdpf and zc are union with skb */
>  		entry->skb = NULL;
>  
>  		tx->read = (tx->read + count) & TSNEP_RING_MASK;
> @@ -718,6 +815,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  		budget--;
>  	} while (likely(budget));
>  
> +	if (tx->xsk_pool) {
> +		if (xsk_frames)
> +			xsk_tx_completed(tx->xsk_pool, xsk_frames);
> +		if (xsk_uses_need_wakeup(tx->xsk_pool))
> +			xsk_set_tx_need_wakeup(tx->xsk_pool);
> +		tsnep_xdp_xmit_zc(tx);

would be good to signal to NAPI if we are done with the work or is there a
need to be rescheduled (when you didn't manage to consume all of the descs
from XSK Tx ring).

> +	}
> +
>  	if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) &&
>  	    netif_tx_queue_stopped(nq)) {
>  		netif_tx_wake_queue(nq);
> @@ -765,12 +870,6 @@ static int tsnep_tx_open(struct tsnep_tx *tx)
>  
>  static void tsnep_tx_close(struct tsnep_tx *tx)
>  {
> -	u32 val;
> -
> -	readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
> -			   ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
> -			   1000000);
> -
>  	tsnep_tx_ring_cleanup(tx);
>  }
>  
> @@ -1786,12 +1885,18 @@ static void tsnep_queue_enable(struct tsnep_queue *queue)
>  	napi_enable(&queue->napi);
>  	tsnep_enable_irq(queue->adapter, queue->irq_mask);
>  
> +	if (queue->tx)
> +		tsnep_tx_enable(queue->tx);
> +
>  	if (queue->rx)
>  		tsnep_rx_enable(queue->rx);
>  }
>  
>  static void tsnep_queue_disable(struct tsnep_queue *queue)
>  {
> +	if (queue->tx)
> +		tsnep_tx_disable(queue->tx, &queue->napi);
> +
>  	napi_disable(&queue->napi);
>  	tsnep_disable_irq(queue->adapter, queue->irq_mask);
>  
> @@ -1908,6 +2013,7 @@ int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool)
>  	if (running)
>  		tsnep_queue_disable(queue);
>  
> +	queue->tx->xsk_pool = pool;
>  	queue->rx->xsk_pool = pool;
>  
>  	if (running) {
> @@ -1928,6 +2034,7 @@ void tsnep_disable_xsk(struct tsnep_queue *queue)
>  	tsnep_rx_free_zc(queue->rx);
>  
>  	queue->rx->xsk_pool = NULL;
> +	queue->tx->xsk_pool = NULL;
>  
>  	if (running) {
>  		tsnep_rx_reopen(queue->rx);
> -- 
> 2.30.2
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ