lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <beac05a2-ad5a-ddb0-4f59-8884147635af@gmail.com>
Date:   Tue, 14 Sep 2021 18:15:28 +0200
From:   Heiner Kallweit <hkallweit1@...il.com>
To:     Guilin Tang <tangguilin@...ontech.com>, nic_swsd@...ltek.com,
        davem@...emloft.net, kuba@...nel.org
Cc:     ast@...nel.org, linux-kernel@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [PATCH 2/2] r8169: ADD XDP support for redirect action

On 14.09.2021 11:31, Guilin Tang wrote:
> Implement XDP_REDIRECT based on the r8169 XDP implementation.
> Can use sample/bpf/xdpsock test.
> 
> Signed-off-by: Guilin Tang <tangguilin@...ontech.com>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 78 +++++++++++++++++++----
>  1 file changed, 65 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 69bc3c68e73d..b56899530ed5 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -69,6 +69,8 @@
>  
>  #define R8169_REGS_SIZE		256
>  #define R8169_RX_BUF_SIZE	(SZ_16K - 1)
> +/*Reserve space for XDP frame*/
> +#define R8169_RX_XDP_BUF_SIZE	(R8169_RX_BUF_SIZE - XDP_PACKET_HEADROOM)
>  #define NUM_TX_DESC	256	/* Number of Tx descriptor registers */
>  #define NUM_RX_DESC	256	/* Number of Rx descriptor registers */
>  #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
> @@ -645,6 +647,7 @@ struct rtl8169_private {
>  	u32 ocp_base;
>  	/*xdp bpf*/
>  	struct bpf_prog *rtl_xdp;
> +	struct xdp_rxq_info rtl_rxq;
>  };
>  
>  typedef void (*rtl_generic_fct)(struct rtl8169_private *tp);
> @@ -2516,7 +2519,7 @@ static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
>  static void rtl_set_rx_max_size(struct rtl8169_private *tp)
>  {
>  	/* Low hurts. Let's disable the filtering. */
> -	RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);
> +	RTL_W16(tp, RxMaxSize, R8169_RX_XDP_BUF_SIZE + 1);

You shouldn't have to change this value. Max mtu is 9k and this
16k buffer is used due to hw bug on an older chip versions.

>  }
>  
>  static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
> @@ -3866,7 +3869,7 @@ static void rtl8169_mark_to_asic(struct RxDesc *desc)
>  	desc->opts2 = 0;
>  	/* Force memory writes to complete before releasing descriptor */
>  	dma_wmb();
> -	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
> +	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_XDP_BUF_SIZE));
>  }
>  
>  static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
> @@ -3881,7 +3884,8 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>  	if (!data)
>  		return NULL;
>  
> -	mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> +	mapping = dma_map_page(d, data, XDP_PACKET_HEADROOM,
> +			R8169_RX_XDP_BUF_SIZE, DMA_FROM_DEVICE);
>  	if (unlikely(dma_mapping_error(d, mapping))) {
>  		netdev_err(tp->dev, "Failed to map RX DMA!\n");
>  		__free_pages(data, get_order(R8169_RX_BUF_SIZE));
> @@ -3901,19 +3905,28 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
>  	for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
>  		dma_unmap_page(tp_to_dev(tp),
>  			       le64_to_cpu(tp->RxDescArray[i].addr),
> -			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> +			       R8169_RX_XDP_BUF_SIZE, DMA_FROM_DEVICE);
>  		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
>  		tp->Rx_databuff[i] = NULL;
>  		tp->RxDescArray[i].addr = 0;
>  		tp->RxDescArray[i].opts1 = 0;
>  	}
>  	tp->rtl_xdp = NULL;
> +	xdp_rxq_info_unreg(&tp->rtl_rxq);
>  }
>  
>  static int rtl8169_rx_fill(struct rtl8169_private *tp)
>  {
>  	int i;
>  
> +	if (xdp_rxq_info_reg(&tp->rtl_rxq, tp->dev, 0, 0))
> +		return -1;

Again, if you need an errno, use an errno.

> +
> +	if (xdp_rxq_info_reg_mem_model(&tp->rtl_rxq, MEM_TYPE_PAGE_SHARED, NULL)) {
> +		xdp_rxq_info_unreg(&tp->rtl_rxq);
> +		return -1;
> +	}
> +
>  	for (i = 0; i < NUM_RX_DESC; i++) {
>  		struct page *data;
>  
> @@ -4513,24 +4526,50 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
>  		skb_checksum_none_assert(skb);
>  }
>  
> +struct page *rtl8169_realloc_rx_data(struct rtl8169_private *tp, struct RxDesc *desc)
> +{
> +	struct page *data = NULL;
> +
> +	do {
> +		data = rtl8169_alloc_rx_data(tp, desc);
> +	} while (data == NULL);
> +
> +	return data;
> +}
> +
>  static struct sk_buff *rtl8619_run_xdp(struct rtl8169_private *tp, struct bpf_prog *xdp_prog,
> -				void *rx_buf, unsigned int pkt_size)
> +				void *rx_buf, unsigned int pkt_size, unsigned int entry)
>  {
> -	int result = R8169_XDP_PASS;
> +	int err, result = R8169_XDP_PASS;
>  	struct xdp_buff xdp;
>  	u32 act;
> +	struct RxDesc *desc = tp->RxDescArray + entry;
>  
> -	xdp.data = rx_buf;
> -	xdp.data_end = xdp.data + pkt_size;
> -	xdp_set_data_meta_invalid(&xdp);
> +	xdp_init_buff(&xdp, R8169_RX_XDP_BUF_SIZE, &tp->rtl_rxq);
> +	xdp_prepare_buff(&xdp, (unsigned char *)rx_buf, XDP_PACKET_HEADROOM, pkt_size, true);
> +	prefetchw(xdp.data_hard_start);
>  
>  	act = bpf_prog_run_xdp(xdp_prog, &xdp);
>  	switch (act) {
>  	case XDP_PASS:
>  		break;
>  	case XDP_TX:
> -	case XDP_REDIRECT:
>  		goto out_failure;
> +	case XDP_REDIRECT:
> +		/*unmap dma page*/
> +		dma_unmap_page(tp_to_dev(tp), le64_to_cpu(desc->addr),
> +			R8169_RX_XDP_BUF_SIZE, DMA_FROM_DEVICE);
> +		err = xdp_do_redirect(tp->dev, &xdp, xdp_prog);
> +		if (unlikely(err)) {
> +			/*free page*/
> +			__free_pages(tp->Rx_databuff[entry], get_order(R8169_RX_BUF_SIZE));
> +			/*realloc*/
> +			tp->Rx_databuff[entry] = rtl8169_realloc_rx_data(tp, desc);

Why this reallocation?

> +			goto out_failure;
> +		} else {
> +			result = R8169_XDP_REDIR;
> +		}
> +		break;
>  	default:
>  		bpf_warn_invalid_xdp_action(act);
>  		fallthrough;
> @@ -4551,6 +4590,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  	struct device *d = tp_to_dev(tp);
>  	int count;
>  	struct bpf_prog *xdp_prog;
> +	unsigned int xdp_xmit = 0;
>  
>  	for (count = 0; count < budget; count++, tp->cur_rx++) {
>  		unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
> @@ -4607,9 +4647,18 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		//Determine whether to execute xdp
>  		xdp_prog = READ_ONCE(tp->rtl_xdp);
>  		if (xdp_prog) {
> -			skb = rtl8619_run_xdp(tp, xdp_prog, (void *)rx_buf, pkt_size);
> +			skb = rtl8619_run_xdp(tp, xdp_prog, (void *)rx_buf, pkt_size, entry);
>  			if (IS_ERR(skb)) {
> -				dev->stats.rx_dropped++;
> +				unsigned int xdp_res = -PTR_ERR(skb);
> +
> +				if (xdp_res & R8169_XDP_REDIR) {
> +					tp->Rx_databuff[entry] = rtl8169_realloc_rx_data(tp, desc);

Why this reallocation?

> +					xdp_xmit |= xdp_res;
> +					dev->stats.rx_packets++;

Why not use dev_sw_netstats_rx_add() like in standard path?

> +					dev->stats.rx_bytes += pkt_size;
> +				} else {
> +					dev->stats.rx_dropped++;
> +				}
>  				goto release_descriptor;
>  			}
>  		}
> @@ -4620,7 +4669,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  			goto release_descriptor;
>  		}
>  
> -		skb_copy_to_linear_data(skb, rx_buf, pkt_size);
> +		skb_copy_to_linear_data(skb, rx_buf + XDP_PACKET_HEADROOM, pkt_size);
>  		skb->tail += pkt_size;
>  		skb->len = pkt_size;
>  		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
> @@ -4640,6 +4689,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		rtl8169_mark_to_asic(desc);
>  	}
>  
> +	if (xdp_xmit & R8169_XDP_REDIR)
> +		xdp_do_flush();
> +
>  	return count;
>  }
>  
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ