lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 10 Jan 2023 09:40:44 -0800
From:   Alexander H Duyck <alexander.duyck@...il.com>
To:     Gerhard Engleder <gerhard@...leder-embedded.com>,
        netdev@...r.kernel.org
Cc:     davem@...emloft.net, kuba@...nel.org, edumazet@...gle.com,
        pabeni@...hat.com
Subject: Re: [PATCH net-next v4 09/10] tsnep: Add XDP RX support

On Mon, 2023-01-09 at 20:15 +0100, Gerhard Engleder wrote:
> If BPF program is set up, then run BPF program for every received frame
> and execute the selected action.
> 
> Signed-off-by: Gerhard Engleder <gerhard@...leder-embedded.com>
> ---
>  drivers/net/ethernet/engleder/tsnep_main.c | 122 ++++++++++++++++++++-
>  1 file changed, 120 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
> index 451ad1849b9d..002c879639db 100644
> --- a/drivers/net/ethernet/engleder/tsnep_main.c
> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
> @@ -27,6 +27,7 @@
>  #include <linux/phy.h>
>  #include <linux/iopoll.h>
>  #include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
>  
>  #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
>  #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4)
> @@ -44,6 +45,9 @@
>  #define TSNEP_COALESCE_USECS_MAX     ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \
>  				      ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1)
>  
> +#define TSNEP_XDP_TX		BIT(0)
> +#define TSNEP_XDP_REDIRECT	BIT(1)
> +
>  enum {
>  	__TSNEP_DOWN,
>  };
> @@ -625,6 +629,28 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
>  	iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
>  }
>  
> +static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
> +				struct xdp_buff *xdp,
> +				struct netdev_queue *tx_nq, struct tsnep_tx *tx)
> +{
> +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> +	bool xmit;
> +
> +	if (unlikely(!xdpf))
> +		return false;
> +
> +	__netif_tx_lock(tx_nq, smp_processor_id());
> +
> +	/* Avoid transmit queue timeout since we share it with the slow path */
> +	txq_trans_cond_update(tx_nq);
> +
> +	xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX);
> +

Again the trans_cond_update should be after the xmit and only if it is
not indicating it completed the transmit.

> +	__netif_tx_unlock(tx_nq);
> +
> +	return xmit;
> +}
> +
>  static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  {
>  	struct tsnep_tx_entry *entry;
> @@ -983,6 +1009,62 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
>  	return i;
>  }
>  
> +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
> +			       struct xdp_buff *xdp, int *status,
> +			       struct netdev_queue *tx_nq, struct tsnep_tx *tx)
> +{
> +	unsigned int length;
> +	unsigned int sync;
> +	u32 act;
> +
> +	length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
> +
> +	act = bpf_prog_run_xdp(prog, xdp);
> +
> +	/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
> +	sync = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
> +	sync = max(sync, length);
> +
> +	switch (act) {
> +	case XDP_PASS:
> +		return false;
> +	case XDP_TX:
> +		if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
> +			goto out_failure;
> +		*status |= TSNEP_XDP_TX;
> +		return true;
> +	case XDP_REDIRECT:
> +		if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
> +			goto out_failure;
> +		*status |= TSNEP_XDP_REDIRECT;
> +		return true;
> +	default:
> +		bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
> +		fallthrough;
> +	case XDP_ABORTED:
> +out_failure:
> +		trace_xdp_exception(rx->adapter->netdev, prog, act);
> +		fallthrough;
> +	case XDP_DROP:
> +		page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
> +				   sync, true);
> +		return true;
> +	}
> +}
> +
> +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status,
> +			       struct netdev_queue *tx_nq, struct tsnep_tx *tx)
> +{
> +	if (status & TSNEP_XDP_TX) {
> +		__netif_tx_lock(tx_nq, smp_processor_id());
> +		tsnep_xdp_xmit_flush(tx);
> +		__netif_tx_unlock(tx_nq);
> +	}
> +
> +	if (status & TSNEP_XDP_REDIRECT)
> +		xdp_do_flush();
> +}
> +
>  static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
>  				       int length)
>  {
> @@ -1018,15 +1100,29 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>  			 int budget)
>  {
>  	struct device *dmadev = rx->adapter->dmadev;
> -	int desc_available;
> -	int done = 0;
>  	enum dma_data_direction dma_dir;
>  	struct tsnep_rx_entry *entry;
> +	struct netdev_queue *tx_nq;
> +	struct bpf_prog *prog;
> +	struct xdp_buff xdp;
>  	struct sk_buff *skb;
> +	struct tsnep_tx *tx;
> +	int desc_available;
> +	int xdp_status = 0;
> +	int done = 0;
>  	int length;
>  
>  	desc_available = tsnep_rx_desc_available(rx);
>  	dma_dir = page_pool_get_dma_dir(rx->page_pool);
> +	prog = READ_ONCE(rx->adapter->xdp_prog);
> +	if (prog) {
> +		int queue = smp_processor_id() % rx->adapter->num_tx_queues;
> +

As I mentioned before. Take a look at how this was addressed in
skb_tx_hash. The modulus division is really expensive.

Also does this make sense. I am assuming you have a 1:1 Tx to Rx
mapping for your queues don't you? If so it might make more sense to
use the Tx queue that you clean in this queue pair.

> +		tx_nq = netdev_get_tx_queue(rx->adapter->netdev, queue);
> +		tx = &rx->adapter->tx[queue];
> +
> +		xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq);
> +	}
>  
>  	while (likely(done < budget) && (rx->read != rx->write)) {
>  		entry = &rx->entry[rx->read];
> @@ -1076,6 +1172,25 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>  		rx->read = (rx->read + 1) % TSNEP_RING_SIZE;
>  		desc_available++;
>  
> +		if (prog) {
> +			bool consume;
> +
> +			xdp_prepare_buff(&xdp, page_address(entry->page),
> +					 XDP_PACKET_HEADROOM + TSNEP_RX_INLINE_METADATA_SIZE,
> +					 length, false);
> +
> +			consume = tsnep_xdp_run_prog(rx, prog, &xdp,
> +						     &xdp_status, tx_nq, tx);
> +			if (consume) {
> +				rx->packets++;
> +				rx->bytes += length;
> +
> +				entry->page = NULL;
> +
> +				continue;
> +			}
> +		}
> +
>  		skb = tsnep_build_skb(rx, entry->page, length);
>  		if (skb) {
>  			page_pool_release_page(rx->page_pool, entry->page);
> @@ -1094,6 +1209,9 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>  		entry->page = NULL;
>  	}
>  
> +	if (xdp_status)
> +		tsnep_finalize_xdp(rx->adapter, xdp_status, tx_nq, tx);
> +
>  	if (desc_available)
>  		tsnep_rx_refill(rx, desc_available, false);
>  

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ